Extend CV classifier contract and provenance UI

This commit is contained in:
2026-04-01 11:06:55 +02:00
parent b283f8b9d2
commit f402213526
7 changed files with 49 additions and 17 deletions
@@ -595,10 +595,10 @@ public sealed class ProfileCvControllerTests
var classifier = new Mock<ICvAiClassifier>();
classifier
.Setup(x => x.ClassifyBlockAsync(It.Is<string>(block => block.Contains("Atlas Systems", StringComparison.Ordinal)), It.IsAny<CancellationToken>()))
.ReturnsAsync(new CvBlockClassificationResult("Work Experience", 0.93, "job block", "Senior Platform Engineer", "Atlas Systems", "Oslo", "2019", "Present", new List<string> { "Built event-driven APIs and migration tooling." }));
.ReturnsAsync(new CvBlockClassificationResult("Work Experience", 0.93, "job block", "Senior Platform Engineer", "Atlas Systems", "Oslo", "2019", "Present", new List<string> { "Built event-driven APIs and migration tooling." }, null, new List<string> { "Python", "SQL" }));
classifier
.Setup(x => x.ClassifyBlockAsync(It.Is<string>(block => block.Contains("Python", StringComparison.Ordinal)), It.IsAny<CancellationToken>()))
.ReturnsAsync(new CvBlockClassificationResult("Skills", 0.88, "skills block", null, null, null, null, null, new List<string>()));
.ReturnsAsync(new CvBlockClassificationResult("Skills", 0.88, "skills block", null, null, null, null, null, new List<string>(), null, new List<string> { "Python", "SQL", "Azure" }));
await using var db = CreateDb();
var paths = CreatePaths();
@@ -638,7 +638,7 @@ public sealed class ProfileCvControllerTests
var classifier = new Mock<ICvAiClassifier>();
classifier
.Setup(x => x.ClassifyBlockAsync(It.IsAny<string>(), It.IsAny<CancellationToken>()))
.ReturnsAsync(new CvBlockClassificationResult("Education", 0.87, "education block", "BSc Computer Science", "University of Oslo", "Oslo", "2016", "2019", new List<string> { "Graduated with focus on distributed systems." }));
.ReturnsAsync(new CvBlockClassificationResult("Education", 0.87, "education block", "BSc Computer Science", "University of Oslo", "Oslo", "2016", "2019", new List<string> { "Graduated with focus on distributed systems." }, null, null));
await using var db = CreateDb();
var paths = CreatePaths();
@@ -934,14 +934,18 @@ public sealed class ProfileCvController : ControllerBase
switch (block.SectionName)
{
case "Professional Summary":
foreach (var item in SplitClassifierContent(block.Content, 5))
foreach (var item in (block.Classification?.Summary is { Count: > 0 }
? block.Classification.Summary
: SplitClassifierContent(block.Content, 5)))
{
summary.Add(item);
}
ApplyClassifierFieldMetadata(profile, "summary", summary.FirstOrDefault(), block, now);
break;
case "Skills":
foreach (var item in SplitClassifierSkills(block.Content))
foreach (var item in (block.Classification?.Skills is { Count: > 0 }
? block.Classification.Skills.Where(skill => !string.IsNullOrWhiteSpace(skill)).Select(skill => skill.Trim())
: SplitClassifierSkills(block.Content)))
{
skills.Add(item);
}
@@ -1013,7 +1017,9 @@ public sealed class ProfileCvController : ControllerBase
End = NullIfWhitespace(classification.End),
IsCurrent = string.Equals(classification.End, "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(classification.End, "Current", StringComparison.OrdinalIgnoreCase),
Bullets = bullets,
Skills = SplitClassifierSkills(block.OriginalBlock)
Skills = classification.Skills is { Count: > 0 }
? classification.Skills.Where(skill => !string.IsNullOrWhiteSpace(skill)).Select(skill => skill.Trim()).ToList()
: SplitClassifierSkills(block.OriginalBlock)
};
return StructuredCvProfileJson.Normalize(new StructuredCvProfile { Jobs = new List<StructuredCvJob> { job } }).Jobs.FirstOrDefault();
@@ -1140,12 +1146,20 @@ public sealed class ProfileCvController : ControllerBase
}
else if (string.Equals(sectionName, "Skills", StringComparison.OrdinalIgnoreCase))
{
var items = SplitClassifierSkills(block);
var items = classification?.Skills is { Count: > 0 }
? classification.Skills.Where(skill => !string.IsNullOrWhiteSpace(skill)).Select(skill => skill.Trim()).ToList()
: SplitClassifierSkills(block);
if (items.Count > 0) content = string.Join("\n", items);
}
else if (string.Equals(sectionName, "Professional Summary", StringComparison.OrdinalIgnoreCase) && classification?.Bullets is { Count: > 0 })
else if (string.Equals(sectionName, "Professional Summary", StringComparison.OrdinalIgnoreCase))
{
content = string.Join("\n", classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}"));
var items = classification?.Summary is { Count: > 0 }
? classification.Summary.Where(line => !string.IsNullOrWhiteSpace(line)).Select(line => $"- {line.Trim()}")
: classification?.Bullets is { Count: > 0 }
? classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}")
: Enumerable.Empty<string>();
var materialized = items.ToList();
if (materialized.Count > 0) content = string.Join("\n", materialized);
}
results.Add(new ClassifiedCvBlock(index + 1, block, sectionName, content, classification));
+3 -1
View File
@@ -13,7 +13,9 @@ public sealed record CvBlockClassificationResult(
string? Location,
string? Start,
string? End,
List<string>? Bullets);
List<string>? Bullets,
List<string>? Summary,
List<string>? Skills);
public interface ICvAiClassifier
{
+1
View File
@@ -109,6 +109,7 @@ function FieldReviewNote({ metadata }: { metadata?: StructuredCvFieldMetadata })
<Box sx={{ display: "flex", gap: 0.75, flexWrap: "wrap", mt: 0.75, alignItems: "center" }}>
<Chip size="small" color={tone.color} variant={tone.color === "default" ? "outlined" : "filled"} label={tone.label} />
{metadata.method ? <Chip size="small" variant="outlined" label={metadata.method} /> : null}
{metadata.sourceBlockId ? <Chip size="small" variant="outlined" label={metadata.sourceBlockId} /> : null}
{metadata.reviewState ? <Chip size="small" variant="outlined" label={metadata.reviewState} /> : null}
{metadata.sourceSnippet ? (
<Typography variant="caption" sx={{ color: "text.secondary" }}>
+4 -3
View File
@@ -29,9 +29,9 @@ const structuredCv = {
appliedExtractionRunId: 12,
updatedAtUtc: '2026-03-28T12:00:00Z',
fields: {
'contact.fullName': { confidence: 0.92, method: 'llm', reviewState: 'suggested', sourceSnippet: 'Demo User' },
summary: { confidence: 0.71, method: 'deterministic', reviewState: 'suggested', sourceSnippet: 'Built backend systems' },
skills: { confidence: 0.68, method: 'deterministic', reviewState: 'suggested', sourceSnippet: '.NET' },
'contact.fullName': { confidence: 0.92, method: 'llm', sourceBlockId: 'block-1', reviewState: 'suggested', sourceSnippet: 'Demo User' },
summary: { confidence: 0.71, method: 'deterministic', sourceBlockId: 'block-2', reviewState: 'suggested', sourceSnippet: 'Built backend systems' },
skills: { confidence: 0.68, method: 'deterministic', sourceBlockId: 'block-3', reviewState: 'suggested', sourceSnippet: '.NET' },
},
},
contact: {
@@ -153,6 +153,7 @@ test('profile page loads persisted structured cv and can re-parse it', async ()
expect(screen.getAllByText(/professional summary/i).length).toBeGreaterThan(0);
expect(screen.getByLabelText(/full name/i)).toHaveValue('Demo User');
expect(screen.getByText(/high 92%/i)).toBeInTheDocument();
expect(screen.getByText(/block-1/i)).toBeInTheDocument();
fireEvent.click(originalExtractionToggle);
expect(originalExtractionToggle).toHaveAttribute('aria-expanded', 'true');
+12 -4
View File
@@ -376,18 +376,24 @@ Return ONLY valid JSON with this exact shape:
"location": string|null,
"start": string|null,
"end": string|null,
"bullets": string[]
"bullets": string[],
"summary": string[],
"skills": string[]
}}
Rules:
- Preserve facts only.
- section must be one of the listed values.
- Use Work Experience only for job/employment blocks.
- For Contact blocks, keep title/company/start/end null and bullets empty.
- For non-work blocks, title/company/start/end should usually be null.
- Use Education only for degree/course/certification blocks.
- For Contact blocks, keep title/company/start/end null and bullets/summary/skills empty.
- For Professional Summary blocks, prefer summary for concise summary lines and keep bullets empty unless the source is already bullet-like.
- For Skills blocks, prefer skills for normalized skill items and keep title/company/start/end null.
- For non-work and non-education blocks, title/company/start/end should usually be null.
- location must look like a place, not a sentence.
- dates must be one of: year, month+year, dd/mm/yyyy, Present, Current.
- bullets should only be job tasks/achievements, not titles, companies, dates, or headings.
- bullets should only be concrete tasks/achievements/details, not titles, companies, dates, or headings.
- skills should be short normalized skill/tool terms, not sentences.
- If unsure, choose Other and keep fields null/empty.
Block:
@@ -405,6 +411,8 @@ Block:
"start": parsed.get("start"),
"end": parsed.get("end"),
"bullets": parsed.get("bullets") or [],
"summary": parsed.get("summary") or [],
"skills": parsed.get("skills") or [],
}
+6
View File
@@ -49,6 +49,8 @@ def test_classify_block_returns_structured_json(monkeypatch):
"start": "2019",
"end": "Present",
"bullets": ["Built event-driven APIs and migration tooling."],
"summary": [],
"skills": ["Python", "SQL"],
}
monkeypatch.setattr(module, "_ollama_generate_json", fake_generate_json)
@@ -62,6 +64,8 @@ def test_classify_block_returns_structured_json(monkeypatch):
assert payload["title"] == "Senior Platform Engineer"
assert payload["company"] == "Atlas Systems"
assert payload["bullets"] == ["Built event-driven APIs and migration tooling."]
assert payload["summary"] == []
assert payload["skills"] == ["Python", "SQL"]
def test_classify_block_defaults_missing_section_to_other(monkeypatch):
@@ -75,3 +79,5 @@ def test_classify_block_defaults_missing_section_to_other(monkeypatch):
payload = response.json()
assert payload["section"] == "Other"
assert payload["bullets"] == []
assert payload["summary"] == []
assert payload["skills"] == []