Improve structured CV fallback extraction

This commit is contained in:
2026-03-28 22:56:55 +01:00
parent 3b6588397e
commit d8ab312f59
3 changed files with 325 additions and 9 deletions
@@ -128,6 +128,53 @@ public sealed class ProfileCvControllerTests
Assert.Contains(structured.Sections, section => section.Name == "Education");
}
[Fact]
public async Task Upload_populates_structured_fields_from_flattened_cv_when_ai_json_is_invalid()
{
var rawExtraction = "connor.babbington@cesnimda.co.uk cesnimda.co.uk +47 41 33 44 70 E D U C A T I O N E X T E N D E D D I P L O M A N V Q L E V E L 3 I N I C T 2012 - 2015 F O L L O W A B O U T M E Mid-level system developer with eight years of experience in UK local government, with expertise in full-stack development, backend, frontend and server administration. I N T E R E S T S I am interested in PC and board games, as well as cooking and learning new skills. E X P E R I E N C E S Y S T E M D E V E L O P E R 2015 - 2023 Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript. + Warwickshire County Council, UK C O N T A C T Native English speaker, Norwegian level A2/B1.";
var user = new ApplicationUser();
var userManager = CreateUserManager();
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
var aiService = new Mock<ISummarizerService>();
aiService
.Setup(x => x.ExtractTextAsync(It.IsAny<Stream>(), It.IsAny<string>(), It.IsAny<string?>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new AiTextExtractionResult(rawExtraction, false, "application/pdf", 1, rawExtraction.Length, "Resume.en.pdf"));
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Reconstruct this CV text extracted from a PDF", StringComparison.Ordinal)), rawExtraction, 2800, 900))
.ReturnsAsync(string.Empty);
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), It.IsAny<string>(), 3200, 900))
.ReturnsAsync("not-json");
var controller = new ProfileCvController(userManager.Object, aiService.Object)
{
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
};
var bytes = Encoding.UTF8.GetBytes("fake pdf bytes");
var file = new FormFile(new MemoryStream(bytes), 0, bytes.Length, "file", "Resume.en.pdf")
{
Headers = new HeaderDictionary(),
ContentType = "application/pdf"
};
var result = await controller.Upload(file);
Assert.IsType<OkObjectResult>(result);
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
Assert.Equal("Connor Babbington", structured.Contact.FullName);
Assert.Equal("connor.babbington@cesnimda.co.uk", structured.Contact.Email);
Assert.Equal("+47 41 33 44 70", structured.Contact.Phone);
Assert.Contains(structured.Summary, item => item.Contains("eight years of experience", StringComparison.OrdinalIgnoreCase));
Assert.Contains(structured.Skills, item => item.Equals("C#", StringComparison.OrdinalIgnoreCase));
Assert.Contains(structured.Interests, item => item.Contains("board games", StringComparison.OrdinalIgnoreCase) || item.Contains("cooking", StringComparison.OrdinalIgnoreCase));
Assert.Contains(structured.Languages, item => item.Name != null && item.Name.Equals("English", StringComparison.OrdinalIgnoreCase));
Assert.Contains(structured.Languages, item => item.Name != null && item.Name.StartsWith("Norwegian", StringComparison.OrdinalIgnoreCase));
Assert.DoesNotContain(structured.Sections, section => section.Name == "General");
}
[Fact]
public async Task Parse_returns_structured_cv_and_persists_it()
{