Improve classifier fallback for flat CV parsing

This commit is contained in:
2026-04-01 11:00:53 +02:00
parent 517c42250d
commit b283f8b9d2
2 changed files with 274 additions and 29 deletions
@@ -617,9 +617,47 @@ public sealed class ProfileCvControllerTests
Assert.Contains("Atlas Systems", matchedJob!.Company ?? string.Empty, StringComparison.Ordinal);
Assert.Contains("Python", structured.Skills);
Assert.Contains("SQL", structured.Skills);
Assert.Equal("classifier", structured.Metadata.Fields["jobs[0].title"].Method);
Assert.Equal("block-1", structured.Metadata.Fields["jobs[0].title"].SourceBlockId);
classifier.Verify(x => x.ClassifyBlockAsync(It.IsAny<string>(), It.IsAny<CancellationToken>()), Times.AtLeastOnce());
}
[Fact]
public async Task Parse_uses_classifier_fallback_for_education_blocks_without_real_sections()
{
var source = "BSc Computer Science\nUniversity of Oslo\nOslo\n2016 - 2019\nGraduated with focus on distributed systems.";
var user = new ApplicationUser { Id = "user-1", ProfileCvText = source };
var userManager = CreateUserManager();
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
var aiService = new Mock<ISummarizerService>();
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), source, 3200, 900))
.ReturnsAsync("not-json");
var classifier = new Mock<ICvAiClassifier>();
classifier
.Setup(x => x.ClassifyBlockAsync(It.IsAny<string>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new CvBlockClassificationResult("Education", 0.87, "education block", "BSc Computer Science", "University of Oslo", "Oslo", "2016", "2019", new List<string> { "Graduated with focus on distributed systems." }));
await using var db = CreateDb();
var paths = CreatePaths();
var controller = CreateController(userManager.Object, aiService.Object, db, paths, classifier.Object);
var result = await controller.Parse(new ProfileCvController.ParseCvRequest(source));
var ok = Assert.IsType<OkObjectResult>(result.Result);
var json = JsonSerializer.Serialize(ok.Value);
Assert.Contains("BSc Computer Science", json);
Assert.Contains("University of Oslo", json);
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
Assert.Single(structured.Education);
Assert.Equal("BSc Computer Science", structured.Education[0].Qualification);
Assert.Equal("University of Oslo", structured.Education[0].Institution);
Assert.Equal("classifier", structured.Metadata.Fields["education[0].qualification"].Method);
}
[Fact]
public async Task Parse_keeps_general_fallback_when_classifier_returns_nothing()
{