Add typed structured CV extraction
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
using System.Security.Claims;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using JobTrackerApi.Controllers;
|
||||
using JobTrackerApi.Models;
|
||||
using JobTrackerApi.Services;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Identity;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Moq;
|
||||
@@ -36,6 +36,200 @@ public sealed class ProfileCvControllerTests
|
||||
Assert.True((badRequest.Value?.ToString() ?? string.Empty).Contains("supported", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Upload_reconstructs_flattened_pdf_cv_before_save()
|
||||
{
|
||||
var rawExtraction = "connor.babbington@cesnimda.co.uk cesnimda.co.uk +47 41 33 44 70 E D U C A T I O N E X T E N D E D D I P L O M A N V Q L E V E L 3 I N I C T 2012 - 2015 F O L L O W A B O U T M E Mid-level system developer with eight years of experience in UK local government. I N T E R E S T S E X P E R I E N C E S Y S T E M D E V E L O P E R 2015 - 2023 Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript. + Warwickshire County Council, UK C O N T A C T Native English speaker, Norwegian level A2/B1.";
|
||||
var reconstructed = "# Connor Babbington\n\n## Contact\nconnor.babbington@cesnimda.co.uk\ncesnimda.co.uk\n+47 41 33 44 70\nTønsberg, Norway\n\n## Professional Summary\nMid-level system developer with eight years of experience in UK local government.\n\n## Work Experience\n### System Developer\nWarwickshire County Council\nUK\n2015 - 2023\n- Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript.\n\n## Education\n### Extended Diploma NVQ Level 3 in ICT\nWarwickshire College\n2012 - 2015\n\n## Languages\nEnglish: Native\nNorwegian: A2/B1";
|
||||
var structuredJson = """
|
||||
{
|
||||
"version": "1",
|
||||
"contact": {
|
||||
"fullName": "Connor Babbington",
|
||||
"email": "connor.babbington@cesnimda.co.uk",
|
||||
"phone": "+47 41 33 44 70",
|
||||
"location": "Tønsberg, Norway",
|
||||
"website": "cesnimda.co.uk"
|
||||
},
|
||||
"summary": ["Mid-level system developer with eight years of experience in UK local government."],
|
||||
"jobs": [
|
||||
{
|
||||
"title": "System Developer",
|
||||
"company": "Warwickshire County Council",
|
||||
"location": "UK",
|
||||
"start": "2015",
|
||||
"end": "2023",
|
||||
"isCurrent": false,
|
||||
"bullets": ["Developed and maintained multiple full-stack applications using C#, Python, Ruby on Rails, SQL, and JavaScript."],
|
||||
"skills": ["C#", "Python", "Ruby on Rails", "SQL", "JavaScript"]
|
||||
}
|
||||
],
|
||||
"education": [
|
||||
{
|
||||
"qualification": "Extended Diploma NVQ Level 3 in ICT",
|
||||
"institution": "Warwickshire College",
|
||||
"start": "2012",
|
||||
"end": "2015",
|
||||
"details": []
|
||||
}
|
||||
],
|
||||
"skills": ["C#", "Python", "Ruby on Rails", "SQL", "JavaScript"],
|
||||
"languages": [
|
||||
{ "name": "English", "level": "Native" },
|
||||
{ "name": "Norwegian", "level": "A2/B1" }
|
||||
],
|
||||
"interests": [],
|
||||
"otherSections": []
|
||||
}
|
||||
""";
|
||||
|
||||
var user = new ApplicationUser();
|
||||
var userManager = CreateUserManager();
|
||||
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
|
||||
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
|
||||
var aiService = new Mock<ISummarizerService>();
|
||||
aiService
|
||||
.Setup(x => x.ExtractTextAsync(It.IsAny<Stream>(), It.IsAny<string>(), It.IsAny<string?>(), It.IsAny<CancellationToken>()))
|
||||
.ReturnsAsync(new AiTextExtractionResult(rawExtraction, false, "application/pdf", 1, rawExtraction.Length, "Resume.en.pdf"));
|
||||
aiService
|
||||
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Reconstruct this CV text extracted from a PDF", StringComparison.Ordinal)), rawExtraction, 2800, 900))
|
||||
.ReturnsAsync(reconstructed);
|
||||
aiService
|
||||
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), reconstructed, 3200, 900))
|
||||
.ReturnsAsync(structuredJson);
|
||||
|
||||
var controller = new ProfileCvController(userManager.Object, aiService.Object)
|
||||
{
|
||||
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
|
||||
};
|
||||
|
||||
var bytes = Encoding.UTF8.GetBytes("fake pdf bytes");
|
||||
var file = new FormFile(new MemoryStream(bytes), 0, bytes.Length, "file", "Resume.en.pdf")
|
||||
{
|
||||
Headers = new HeaderDictionary(),
|
||||
ContentType = "application/pdf"
|
||||
};
|
||||
|
||||
var result = await controller.Upload(file);
|
||||
|
||||
Assert.IsType<OkObjectResult>(result);
|
||||
Assert.Equal(reconstructed, user.ProfileCvText);
|
||||
|
||||
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
|
||||
Assert.Equal("Connor Babbington", structured.Contact.FullName);
|
||||
Assert.Single(structured.Summary);
|
||||
Assert.Single(structured.Jobs);
|
||||
Assert.Equal("System Developer", structured.Jobs[0].Title);
|
||||
Assert.Single(structured.Education);
|
||||
Assert.Equal("Extended Diploma NVQ Level 3 in ICT", structured.Education[0].Qualification);
|
||||
Assert.Contains(structured.Sections, section => section.Name == "Contact");
|
||||
Assert.Contains(structured.Sections, section => section.Name == "Professional Summary");
|
||||
Assert.Contains(structured.Sections, section => section.Name == "Work Experience");
|
||||
Assert.Contains(structured.Sections, section => section.Name == "Education");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Parse_returns_structured_cv_and_persists_it()
|
||||
{
|
||||
var user = new ApplicationUser
|
||||
{
|
||||
ProfileCvText = "# Connor Babbington\n\n## Contact\nconnor@example.com\n+47 41 33 44 70\n\n## Professional Summary\nBuilt backend systems.\n\n## Work Experience\n### System Developer\nWarwickshire County Council\n2015 - 2023\n- Built APIs\n\n## Education\n### Warwickshire College\n2012 - 2015"
|
||||
};
|
||||
var structuredJson = """
|
||||
{
|
||||
"version": "1",
|
||||
"contact": {
|
||||
"fullName": "Connor Babbington",
|
||||
"email": "connor@example.com",
|
||||
"phone": "+47 41 33 44 70"
|
||||
},
|
||||
"summary": ["Built backend systems."],
|
||||
"jobs": [
|
||||
{
|
||||
"title": "System Developer",
|
||||
"company": "Warwickshire County Council",
|
||||
"start": "2015",
|
||||
"end": "2023",
|
||||
"isCurrent": false,
|
||||
"bullets": ["Built APIs"],
|
||||
"skills": [".NET"]
|
||||
}
|
||||
],
|
||||
"education": [
|
||||
{
|
||||
"qualification": "Warwickshire College",
|
||||
"start": "2012",
|
||||
"end": "2015",
|
||||
"details": []
|
||||
}
|
||||
],
|
||||
"skills": [".NET"],
|
||||
"languages": [],
|
||||
"interests": [],
|
||||
"otherSections": []
|
||||
}
|
||||
""";
|
||||
|
||||
var userManager = CreateUserManager();
|
||||
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
|
||||
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
|
||||
var aiService = new Mock<ISummarizerService>();
|
||||
aiService
|
||||
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), user.ProfileCvText, 3200, 900))
|
||||
.ReturnsAsync(structuredJson);
|
||||
|
||||
var controller = new ProfileCvController(userManager.Object, aiService.Object)
|
||||
{
|
||||
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
|
||||
};
|
||||
|
||||
var result = await controller.Parse(new ProfileCvController.ParseCvRequest(user.ProfileCvText));
|
||||
|
||||
var ok = Assert.IsType<OkObjectResult>(result.Result);
|
||||
var json = JsonSerializer.Serialize(ok.Value);
|
||||
Assert.Contains("structuredCv", json, StringComparison.OrdinalIgnoreCase);
|
||||
Assert.Contains("Connor Babbington", json);
|
||||
Assert.Contains("System Developer", json);
|
||||
|
||||
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
|
||||
Assert.Equal("Connor Babbington", structured.Contact.FullName);
|
||||
Assert.Single(structured.Jobs);
|
||||
Assert.Equal("System Developer", structured.Jobs[0].Title);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Parse_falls_back_to_section_parsing_when_ai_json_is_invalid()
|
||||
{
|
||||
var user = new ApplicationUser
|
||||
{
|
||||
ProfileCvText = "# Connor Babbington\n\n## Professional Summary\nBuilt backend systems.\n\n## Skills\n.NET\nSQL\nAzure"
|
||||
};
|
||||
var userManager = CreateUserManager();
|
||||
userManager.Setup(x => x.GetUserAsync(It.IsAny<ClaimsPrincipal>())).ReturnsAsync(user);
|
||||
userManager.Setup(x => x.UpdateAsync(user)).ReturnsAsync(IdentityResult.Success);
|
||||
var aiService = new Mock<ISummarizerService>();
|
||||
aiService
|
||||
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), user.ProfileCvText, 3200, 900))
|
||||
.ReturnsAsync("not-json");
|
||||
|
||||
var controller = new ProfileCvController(userManager.Object, aiService.Object)
|
||||
{
|
||||
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
|
||||
};
|
||||
|
||||
var result = await controller.Parse(new ProfileCvController.ParseCvRequest(user.ProfileCvText));
|
||||
|
||||
var ok = Assert.IsType<OkObjectResult>(result.Result);
|
||||
var json = JsonSerializer.Serialize(ok.Value);
|
||||
Assert.Contains("Professional Summary", json);
|
||||
|
||||
var structured = StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson);
|
||||
Assert.Contains("Built backend systems.", structured.Summary);
|
||||
Assert.Contains(".NET", structured.Skills);
|
||||
Assert.Contains("SQL", structured.Skills);
|
||||
Assert.Equal("Connor Babbington", structured.Contact.FullName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Upload_accepts_markdown_cv_and_saves_text()
|
||||
{
|
||||
@@ -46,18 +240,38 @@ public sealed class ProfileCvControllerTests
|
||||
var aiService = new Mock<ISummarizerService>();
|
||||
aiService
|
||||
.Setup(x => x.ExtractTextAsync(It.IsAny<Stream>(), It.IsAny<string>(), It.IsAny<string?>(), It.IsAny<CancellationToken>()))
|
||||
.ReturnsAsync(new AiTextExtractionResult("# CV\nBuilt APIs and UIs", false, "text/markdown", null, 22, "resume.md"));
|
||||
.ReturnsAsync(new AiTextExtractionResult("# Connor Babbington\n\n## Professional Summary\nBuilt APIs and UIs", false, "text/markdown", null, 62, "resume.md"));
|
||||
aiService
|
||||
.Setup(x => x.SummarizeSectionAsync(It.Is<string>(instruction => instruction.Contains("Extract this CV into structured JSON", StringComparison.Ordinal)), It.IsAny<string>(), 3200, 900))
|
||||
.ReturnsAsync("""
|
||||
{
|
||||
"version":"1",
|
||||
"contact":{"fullName":"Connor Babbington"},
|
||||
"summary":["Built APIs and UIs"],
|
||||
"jobs":[],
|
||||
"education":[],
|
||||
"skills":[],
|
||||
"languages":[],
|
||||
"interests":[],
|
||||
"otherSections":[]
|
||||
}
|
||||
""");
|
||||
|
||||
var controller = new ProfileCvController(userManager.Object, aiService.Object)
|
||||
{
|
||||
ControllerContext = new ControllerContext { HttpContext = new DefaultHttpContext() }
|
||||
};
|
||||
|
||||
var file = new FormFile(new MemoryStream(Encoding.UTF8.GetBytes("# CV\nBuilt APIs and UIs")), 0, 23, "file", "resume.md");
|
||||
var file = new FormFile(new MemoryStream(Encoding.UTF8.GetBytes("# Connor Babbington\n\n## Professional Summary\nBuilt APIs and UIs")), 0, 62, "file", "resume.md")
|
||||
{
|
||||
Headers = new HeaderDictionary(),
|
||||
ContentType = "text/markdown"
|
||||
};
|
||||
var result = await controller.Upload(file);
|
||||
|
||||
Assert.IsType<OkObjectResult>(result);
|
||||
Assert.Contains("Built APIs", user.ProfileCvText);
|
||||
Assert.Equal("Connor Babbington", StructuredCvProfileJson.Deserialize(user.ProfileCvStructureJson).Contact.FullName);
|
||||
}
|
||||
|
||||
private static Mock<UserManager<ApplicationUser>> CreateUserManager()
|
||||
|
||||
Reference in New Issue
Block a user