From f4022135263ed7e46353b6a0725d8c402fc6ba0f Mon Sep 17 00:00:00 2001 From: cesnimda Date: Wed, 1 Apr 2026 11:06:55 +0200 Subject: [PATCH] Extend CV classifier contract and provenance UI --- .../ProfileCvControllerTests.cs | 6 ++--- .../Controllers/ProfileCvController.cs | 26 ++++++++++++++----- JobTrackerApi/Services/CvAiClassifier.cs | 4 ++- job-tracker-ui/src/pages/ProfilePage.tsx | 1 + job-tracker-ui/src/profile-page.test.tsx | 7 ++--- tools/summarizer/app.py | 16 +++++++++--- tools/summarizer/tests/test_app.py | 6 +++++ 7 files changed, 49 insertions(+), 17 deletions(-) diff --git a/JobTrackerApi.Tests/ProfileCvControllerTests.cs b/JobTrackerApi.Tests/ProfileCvControllerTests.cs index 51f7635..9ab8b07 100644 --- a/JobTrackerApi.Tests/ProfileCvControllerTests.cs +++ b/JobTrackerApi.Tests/ProfileCvControllerTests.cs @@ -595,10 +595,10 @@ public sealed class ProfileCvControllerTests var classifier = new Mock(); classifier .Setup(x => x.ClassifyBlockAsync(It.Is(block => block.Contains("Atlas Systems", StringComparison.Ordinal)), It.IsAny())) - .ReturnsAsync(new CvBlockClassificationResult("Work Experience", 0.93, "job block", "Senior Platform Engineer", "Atlas Systems", "Oslo", "2019", "Present", new List { "Built event-driven APIs and migration tooling." })); + .ReturnsAsync(new CvBlockClassificationResult("Work Experience", 0.93, "job block", "Senior Platform Engineer", "Atlas Systems", "Oslo", "2019", "Present", new List { "Built event-driven APIs and migration tooling." }, null, new List { "Python", "SQL" })); classifier .Setup(x => x.ClassifyBlockAsync(It.Is(block => block.Contains("Python", StringComparison.Ordinal)), It.IsAny())) - .ReturnsAsync(new CvBlockClassificationResult("Skills", 0.88, "skills block", null, null, null, null, null, new List())); + .ReturnsAsync(new CvBlockClassificationResult("Skills", 0.88, "skills block", null, null, null, null, null, new List(), null, new List { "Python", "SQL", "Azure" })); await using var db = CreateDb(); var paths = CreatePaths(); @@ -638,7 +638,7 @@ public sealed class ProfileCvControllerTests var classifier = new Mock(); classifier .Setup(x => x.ClassifyBlockAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new CvBlockClassificationResult("Education", 0.87, "education block", "BSc Computer Science", "University of Oslo", "Oslo", "2016", "2019", new List { "Graduated with focus on distributed systems." })); + .ReturnsAsync(new CvBlockClassificationResult("Education", 0.87, "education block", "BSc Computer Science", "University of Oslo", "Oslo", "2016", "2019", new List { "Graduated with focus on distributed systems." }, null, null)); await using var db = CreateDb(); var paths = CreatePaths(); diff --git a/JobTrackerApi/Controllers/ProfileCvController.cs b/JobTrackerApi/Controllers/ProfileCvController.cs index 94abc86..1162acb 100644 --- a/JobTrackerApi/Controllers/ProfileCvController.cs +++ b/JobTrackerApi/Controllers/ProfileCvController.cs @@ -934,14 +934,18 @@ public sealed class ProfileCvController : ControllerBase switch (block.SectionName) { case "Professional Summary": - foreach (var item in SplitClassifierContent(block.Content, 5)) + foreach (var item in (block.Classification?.Summary is { Count: > 0 } + ? block.Classification.Summary + : SplitClassifierContent(block.Content, 5))) { summary.Add(item); } ApplyClassifierFieldMetadata(profile, "summary", summary.FirstOrDefault(), block, now); break; case "Skills": - foreach (var item in SplitClassifierSkills(block.Content)) + foreach (var item in (block.Classification?.Skills is { Count: > 0 } + ? block.Classification.Skills.Where(skill => !string.IsNullOrWhiteSpace(skill)).Select(skill => skill.Trim()) + : SplitClassifierSkills(block.Content))) { skills.Add(item); } @@ -1013,7 +1017,9 @@ public sealed class ProfileCvController : ControllerBase End = NullIfWhitespace(classification.End), IsCurrent = string.Equals(classification.End, "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(classification.End, "Current", StringComparison.OrdinalIgnoreCase), Bullets = bullets, - Skills = SplitClassifierSkills(block.OriginalBlock) + Skills = classification.Skills is { Count: > 0 } + ? classification.Skills.Where(skill => !string.IsNullOrWhiteSpace(skill)).Select(skill => skill.Trim()).ToList() + : SplitClassifierSkills(block.OriginalBlock) }; return StructuredCvProfileJson.Normalize(new StructuredCvProfile { Jobs = new List { job } }).Jobs.FirstOrDefault(); @@ -1140,12 +1146,20 @@ public sealed class ProfileCvController : ControllerBase } else if (string.Equals(sectionName, "Skills", StringComparison.OrdinalIgnoreCase)) { - var items = SplitClassifierSkills(block); + var items = classification?.Skills is { Count: > 0 } + ? classification.Skills.Where(skill => !string.IsNullOrWhiteSpace(skill)).Select(skill => skill.Trim()).ToList() + : SplitClassifierSkills(block); if (items.Count > 0) content = string.Join("\n", items); } - else if (string.Equals(sectionName, "Professional Summary", StringComparison.OrdinalIgnoreCase) && classification?.Bullets is { Count: > 0 }) + else if (string.Equals(sectionName, "Professional Summary", StringComparison.OrdinalIgnoreCase)) { - content = string.Join("\n", classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}")); + var items = classification?.Summary is { Count: > 0 } + ? classification.Summary.Where(line => !string.IsNullOrWhiteSpace(line)).Select(line => $"- {line.Trim()}") + : classification?.Bullets is { Count: > 0 } + ? classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}") + : Enumerable.Empty(); + var materialized = items.ToList(); + if (materialized.Count > 0) content = string.Join("\n", materialized); } results.Add(new ClassifiedCvBlock(index + 1, block, sectionName, content, classification)); diff --git a/JobTrackerApi/Services/CvAiClassifier.cs b/JobTrackerApi/Services/CvAiClassifier.cs index 70aba34..e598751 100644 --- a/JobTrackerApi/Services/CvAiClassifier.cs +++ b/JobTrackerApi/Services/CvAiClassifier.cs @@ -13,7 +13,9 @@ public sealed record CvBlockClassificationResult( string? Location, string? Start, string? End, - List? Bullets); + List? Bullets, + List? Summary, + List? Skills); public interface ICvAiClassifier { diff --git a/job-tracker-ui/src/pages/ProfilePage.tsx b/job-tracker-ui/src/pages/ProfilePage.tsx index d8c405f..28345b0 100644 --- a/job-tracker-ui/src/pages/ProfilePage.tsx +++ b/job-tracker-ui/src/pages/ProfilePage.tsx @@ -109,6 +109,7 @@ function FieldReviewNote({ metadata }: { metadata?: StructuredCvFieldMetadata }) {metadata.method ? : null} + {metadata.sourceBlockId ? : null} {metadata.reviewState ? : null} {metadata.sourceSnippet ? ( diff --git a/job-tracker-ui/src/profile-page.test.tsx b/job-tracker-ui/src/profile-page.test.tsx index 16214d4..fff5476 100644 --- a/job-tracker-ui/src/profile-page.test.tsx +++ b/job-tracker-ui/src/profile-page.test.tsx @@ -29,9 +29,9 @@ const structuredCv = { appliedExtractionRunId: 12, updatedAtUtc: '2026-03-28T12:00:00Z', fields: { - 'contact.fullName': { confidence: 0.92, method: 'llm', reviewState: 'suggested', sourceSnippet: 'Demo User' }, - summary: { confidence: 0.71, method: 'deterministic', reviewState: 'suggested', sourceSnippet: 'Built backend systems' }, - skills: { confidence: 0.68, method: 'deterministic', reviewState: 'suggested', sourceSnippet: '.NET' }, + 'contact.fullName': { confidence: 0.92, method: 'llm', sourceBlockId: 'block-1', reviewState: 'suggested', sourceSnippet: 'Demo User' }, + summary: { confidence: 0.71, method: 'deterministic', sourceBlockId: 'block-2', reviewState: 'suggested', sourceSnippet: 'Built backend systems' }, + skills: { confidence: 0.68, method: 'deterministic', sourceBlockId: 'block-3', reviewState: 'suggested', sourceSnippet: '.NET' }, }, }, contact: { @@ -153,6 +153,7 @@ test('profile page loads persisted structured cv and can re-parse it', async () expect(screen.getAllByText(/professional summary/i).length).toBeGreaterThan(0); expect(screen.getByLabelText(/full name/i)).toHaveValue('Demo User'); expect(screen.getByText(/high 92%/i)).toBeInTheDocument(); + expect(screen.getByText(/block-1/i)).toBeInTheDocument(); fireEvent.click(originalExtractionToggle); expect(originalExtractionToggle).toHaveAttribute('aria-expanded', 'true'); diff --git a/tools/summarizer/app.py b/tools/summarizer/app.py index 95c47f8..56ce59b 100644 --- a/tools/summarizer/app.py +++ b/tools/summarizer/app.py @@ -376,18 +376,24 @@ Return ONLY valid JSON with this exact shape: "location": string|null, "start": string|null, "end": string|null, - "bullets": string[] + "bullets": string[], + "summary": string[], + "skills": string[] }} Rules: - Preserve facts only. - section must be one of the listed values. - Use Work Experience only for job/employment blocks. -- For Contact blocks, keep title/company/start/end null and bullets empty. -- For non-work blocks, title/company/start/end should usually be null. +- Use Education only for degree/course/certification blocks. +- For Contact blocks, keep title/company/start/end null and bullets/summary/skills empty. +- For Professional Summary blocks, prefer summary for concise summary lines and keep bullets empty unless the source is already bullet-like. +- For Skills blocks, prefer skills for normalized skill items and keep title/company/start/end null. +- For non-work and non-education blocks, title/company/start/end should usually be null. - location must look like a place, not a sentence. - dates must be one of: year, month+year, dd/mm/yyyy, Present, Current. -- bullets should only be job tasks/achievements, not titles, companies, dates, or headings. +- bullets should only be concrete tasks/achievements/details, not titles, companies, dates, or headings. +- skills should be short normalized skill/tool terms, not sentences. - If unsure, choose Other and keep fields null/empty. Block: @@ -405,6 +411,8 @@ Block: "start": parsed.get("start"), "end": parsed.get("end"), "bullets": parsed.get("bullets") or [], + "summary": parsed.get("summary") or [], + "skills": parsed.get("skills") or [], } diff --git a/tools/summarizer/tests/test_app.py b/tools/summarizer/tests/test_app.py index 148e531..c9bc1b9 100644 --- a/tools/summarizer/tests/test_app.py +++ b/tools/summarizer/tests/test_app.py @@ -49,6 +49,8 @@ def test_classify_block_returns_structured_json(monkeypatch): "start": "2019", "end": "Present", "bullets": ["Built event-driven APIs and migration tooling."], + "summary": [], + "skills": ["Python", "SQL"], } monkeypatch.setattr(module, "_ollama_generate_json", fake_generate_json) @@ -62,6 +64,8 @@ def test_classify_block_returns_structured_json(monkeypatch): assert payload["title"] == "Senior Platform Engineer" assert payload["company"] == "Atlas Systems" assert payload["bullets"] == ["Built event-driven APIs and migration tooling."] + assert payload["summary"] == [] + assert payload["skills"] == ["Python", "SQL"] def test_classify_block_defaults_missing_section_to_other(monkeypatch): @@ -75,3 +79,5 @@ def test_classify_block_defaults_missing_section_to_other(monkeypatch): payload = response.json() assert payload["section"] == "Other" assert payload["bullets"] == [] + assert payload["summary"] == [] + assert payload["skills"] == []