Add typed structured CV extraction

This commit is contained in:
2026-03-28 15:01:32 +01:00
parent 19a4da9382
commit 8f8a34ad9c
5 changed files with 1029 additions and 77 deletions
+217 -3
View File
@@ -1,12 +1,12 @@
using System.Security.Claims;
using System.Text;
using System.Text.Json;
using JobTrackerApi.Controllers;
using JobTrackerApi.Models;
using JobTrackerApi.Services;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Identity;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Moq;
@@ -36,6 +36,200 @@ public sealed class ProfileCvControllerTests
Assert.True((badRequest.Value?.ToString() ?? string.Empty).Contains("supported", StringComparison.OrdinalIgnoreCase));
}
[Fact]
public async Task Upload_reconstructs_flattened_pdf_cv_before_save()
{
var rawExtraction = "connor.babbington@cesnimda.co.uk cesnimda.co.uk +47 41 33 44 70 E D U C A T I O N E X T E N D E D D I P L O M A N V Q L E V E L 3 I N I C T 2012 - 2015 F O L L O W A B O U T M E Mid-level system developer with eight years of experience in UK local government. I N T E R E S T S E X P E R I E N C E S Y S T E M D E V E L O P E R 2015 - 2023 Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript. + Warwickshire County Council, UK C O N T A C T Native English speaker, Norwegian level A2/B1.";
var reconstructed = "# Connor Babbington\n\n## Contact\nconnor.babbington@cesnimda.co.uk\ncesnimda.co.uk\n+47 41 33 44 70\nTønsberg, Norway\n\n## Professional Summary\nMid-level system developer with eight years of experience in UK local government.\n\n## Work Experience\n### System Developer\nWarwickshire County Council\nUK\n2015 - 2023\n- Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript.\n\n## Education\n### Extended Diploma NVQ Level 3 in ICT\nWarwickshire College\n2012 - 2015\n\n## Languages\nEnglish: Native\nNorwegian: A2/B1";
var structuredJson = """
{
"version": "1",
"contact": {
"fullName": "Connor Babbington",
"email": "connor.babbington@cesnimda.co.uk",
"phone": "+47 41 33 44 70",
"location": "Tønsberg, Norway",
"website": "cesnimda.co.uk"
},
"summary": ["Mid-level system developer with eight years of experience in UK local government."],
"jobs": [
{
"title": "System Developer",
"company": "Warwickshire County Council",
"location": "UK",
"start": "2015",
"end": "2023",
"isCurrent": false,
"bullets": ["Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript."],
"skills": ["C#", "Python", "Ruby on Rails", "SQL", "JavaScript"]
}
],
"education": [
{
"qualification": "Extended Diploma NVQ Level 3 in ICT",
"institution": "Warwickshire College",
"start": "2012",
"end": "2015",
"details": []
}
],
"skills": ["C#", "Python", "Ruby on Rails", "SQL", "JavaScript"],
"languages": [
{ "name": "English", "level": "Native" },
{ "name": "Norwegian", "level": "A2/B1" }
],
"interests": [],
"otherSections": []
}
""";
var user = new ApplicationUser();
var userManager = CreateUserManager();
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
var aiService = new Mock<ISummarizerService>();
aiService
.Setup(x => x.ExtractTextAsync(It.IsAny<Stream>(), It.IsAny<string>(), It.IsAny<string?>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new AiTextExtractionResult(rawExtraction, false, "application/pdf", 1, rawExtraction.Length, "Resume.en.pdf"));
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Reconstruct this CV text extracted from a PDF", StringComparison.Ordinal)), rawExtraction, 2800, 900))
.ReturnsAsync(reconstructed);
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), reconstructed, 3200, 900))
.ReturnsAsync(structuredJson);
var controller = new ProfileCvController(userManager.Object, aiService.Object)
{
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
};
var bytes = Encoding.UTF8.GetBytes("fake pdf bytes");
var file = new FormFile(new MemoryStream(bytes), 0, bytes.Length, "file", "Resume.en.pdf")
{
Headers = new HeaderDictionary(),
ContentType = "application/pdf"
};
var result = await controller.Upload(file);
Assert.IsType<OkObjectResult>(result);
Assert.Equal(reconstructed, user.ProfileCvText);
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
Assert.Equal("Connor Babbington", structured.Contact.FullName);
Assert.Single(structured.Summary);
Assert.Single(structured.Jobs);
Assert.Equal("System Developer", structured.Jobs[0].Title);
Assert.Single(structured.Education);
Assert.Equal("Extended Diploma NVQ Level 3 in ICT", structured.Education[0].Qualification);
Assert.Contains(structured.Sections, section => section.Name == "Contact");
Assert.Contains(structured.Sections, section => section.Name == "Professional Summary");
Assert.Contains(structured.Sections, section => section.Name == "Work Experience");
Assert.Contains(structured.Sections, section => section.Name == "Education");
}
[Fact]
public async Task Parse_returns_structured_cv_and_persists_it()
{
var user = new ApplicationUser
{
ProfileCvText = "# Connor Babbington\n\n## Contact\nconnor@example.com\n+47 41 33 44 70\n\n## Professional Summary\nBuilt backend systems.\n\n## Work Experience\n### System Developer\nWarwickshire County Council\n2015 - 2023\n- Built APIs\n\n## Education\n### Warwickshire College\n2012 - 2015"
};
var structuredJson = """
{
"version": "1",
"contact": {
"fullName": "Connor Babbington",
"email": "connor@example.com",
"phone": "+47 41 33 44 70"
},
"summary": ["Built backend systems."],
"jobs": [
{
"title": "System Developer",
"company": "Warwickshire County Council",
"start": "2015",
"end": "2023",
"isCurrent": false,
"bullets": ["Built APIs"],
"skills": [".NET"]
}
],
"education": [
{
"qualification": "Warwickshire College",
"start": "2012",
"end": "2015",
"details": []
}
],
"skills": [".NET"],
"languages": [],
"interests": [],
"otherSections": []
}
""";
var userManager = CreateUserManager();
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
var aiService = new Mock<ISummarizerService>();
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), user.ProfileCvText, 3200, 900))
.ReturnsAsync(structuredJson);
var controller = new ProfileCvController(userManager.Object, aiService.Object)
{
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
};
var result = await controller.Parse(new ProfileCvController.ParseCvRequest(user.ProfileCvText));
var ok = Assert.IsType<OkObjectResult>(result.Result);
var json = JsonSerializer.Serialize(ok.Value);
Assert.Contains("structuredCv", json, StringComparison.OrdinalIgnoreCase);
Assert.Contains("Connor Babbington", json);
Assert.Contains("System Developer", json);
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
Assert.Equal("Connor Babbington", structured.Contact.FullName);
Assert.Single(structured.Jobs);
Assert.Equal("System Developer", structured.Jobs[0].Title);
}
[Fact]
public async Task Parse_falls_back_to_section_parsing_when_ai_json_is_invalid()
{
var user = new ApplicationUser
{
ProfileCvText = "# Connor Babbington\n\n## Professional Summary\nBuilt backend systems.\n\n## Skills\n.NET\nSQL\nAzure"
};
var userManager = CreateUserManager();
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
var aiService = new Mock<ISummarizerService>();
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), user.ProfileCvText, 3200, 900))
.ReturnsAsync("not-json");
var controller = new ProfileCvController(userManager.Object, aiService.Object)
{
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
};
var result = await controller.Parse(new ProfileCvController.ParseCvRequest(user.ProfileCvText));
var ok = Assert.IsType<OkObjectResult>(result.Result);
var json = JsonSerializer.Serialize(ok.Value);
Assert.Contains("Professional Summary", json);
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
Assert.Contains("Built backend systems.", structured.Summary);
Assert.Contains(".NET", structured.Skills);
Assert.Contains("SQL", structured.Skills);
Assert.Equal("Connor Babbington", structured.Contact.FullName);
}
[Fact]
public async Task Upload_accepts_markdown_cv_and_saves_text()
{
@@ -46,18 +240,38 @@ public sealed class ProfileCvControllerTests
var aiService = new Mock<ISummarizerService>();
aiService
.Setup(x => x.ExtractTextAsync(It.IsAny<Stream>(), It.IsAny<string>(), It.IsAny<string?>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new AiTextExtractionResult("# CV\nBuilt APIs and UIs", false, "text/markdown", null, 22, "resume.md"));
.ReturnsAsync(new AiTextExtractionResult("# Connor Babbington\n\n## Professional Summary\nBuilt APIs and UIs", false, "text/markdown", null, 62, "resume.md"));
aiService
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), It.IsAny<string>(), 3200, 900))
.ReturnsAsync("""
{
"version":"1",
"contact":{"fullName":"Connor Babbington"},
"summary":["Built APIs and UIs"],
"jobs":[],
"education":[],
"skills":[],
"languages":[],
"interests":[],
"otherSections":[]
}
""");
var controller = new ProfileCvController(userManager.Object, aiService.Object)
{
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
};
var file = new FormFile(new MemoryStream(Encoding.UTF8.GetBytes("# CV\nBuilt APIs and UIs")), 0, 23, "file", "resume.md");
var file = new FormFile(new MemoryStream(Encoding.UTF8.GetBytes("# Connor Babbington\n\n## Professional Summary\nBuilt APIs and UIs")), 0, 62, "file", "resume.md")
{
Headers = new HeaderDictionary(),
ContentType = "text/markdown"
};
var result = await controller.Upload(file);
Assert.IsType<OkObjectResult>(result);
Assert.Contains("Built APIs", user.ProfileCvText);
Assert.Equal("Connor Babbington", StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson).Contact.FullName);
}
private static Mock<UserManager<ApplicationUser>> CreateUserManager()