Add CV extraction review surfaces

This commit is contained in:
2026-03-28 23:51:03 +01:00
parent 107c181506
commit 2392b135c2
7 changed files with 390 additions and 34 deletions
@@ -76,6 +76,18 @@ public sealed class ProfileCvController : ControllerBase
public sealed record ParseCvRequest(string? Text);
private sealed record ExtractionPipelineResult(string RawText, string NormalizedText, StructuredCvProfile StructuredCv);
public sealed record CvExtractionRunListItem(
int Id,
string Trigger,
string Status,
string? ArtifactFileName,
DateTimeOffset StartedAtUtc,
DateTimeOffset? CompletedAtUtc,
DateTimeOffset? AppliedAtUtc,
string ParserVersion,
string NormalizerVersion,
string LlmPromptVersion,
string? ErrorMessage);
[HttpPost("upload")]
[RequestSizeLimit(MaxFileSizeBytes)]
@@ -163,6 +175,34 @@ public sealed class ProfileCvController : ControllerBase
}
}
[HttpGet("runs")]
public async Task<ActionResult<IEnumerable<CvExtractionRunListItem>>> GetRuns()
{
var user = await _users.GetUserAsync(User);
if (user is null) return Unauthorized();
var runs = await _db.CvExtractionRuns
.AsNoTracking()
.Where(x => x.OwnerUserId == user.Id)
.OrderByDescending(x => x.StartedAtUtc)
.Take(10)
.Select(x => new CvExtractionRunListItem(
x.Id,
x.Trigger,
x.Status,
x.Artifact != null ? x.Artifact.OriginalFileName : null,
x.StartedAtUtc,
x.CompletedAtUtc,
x.AppliedAtUtc,
x.ParserVersion,
x.NormalizerVersion,
x.LlmPromptVersion,
x.ErrorMessage))
.ToListAsync(HttpContext.RequestAborted);
return Ok(runs);
}
[HttpPost("reprocess")]
public async Task<IActionResult> Reprocess()
{
@@ -308,7 +348,9 @@ public sealed class ProfileCvController : ControllerBase
.ToList();
var sectionFallback = StructuredCvProfileJson.FromSections(fallbackSections);
AnnotateStructuredCv(sectionFallback, "repair", 0.56);
var heuristicFallback = BuildHeuristicStructuredCv(parseSource, text);
AnnotateStructuredCv(heuristicFallback, "deterministic", 0.68);
heuristicFallback.Sections = new List<StructuredCvSection>();
var fallback = StructuredCvProfileJson.Merge(heuristicFallback, sectionFallback);
fallback.Contact.FullName ??= GuessFullName(text) ?? GuessFullNameFromEmail(fallback.Contact.Email);
@@ -433,6 +475,40 @@ public sealed class ProfileCvController : ControllerBase
await _db.SaveChangesAsync(cancellationToken);
}
private static void AnnotateStructuredCv(StructuredCvProfile profile, string method, double confidence)
{
var now = DateTimeOffset.UtcNow;
profile.Metadata ??= new StructuredCvMetadata();
profile.Metadata.Fields ??= new Dictionary<string, StructuredCvFieldMetadata>();
void SetIf(string key, string? value)
{
if (string.IsNullOrWhiteSpace(value)) return;
profile.Metadata.Fields[key] = new StructuredCvFieldMetadata
{
Confidence = confidence,
Method = method,
SourceSnippet = value.Length > 180 ? value[..180] : value,
ReviewState = "suggested",
LastUpdatedAtUtc = now,
};
}
SetIf("contact.fullName", profile.Contact.FullName);
SetIf("contact.headline", profile.Contact.Headline);
SetIf("contact.email", profile.Contact.Email);
SetIf("contact.phone", profile.Contact.Phone);
SetIf("contact.location", profile.Contact.Location);
SetIf("contact.website", profile.Contact.Website);
SetIf("contact.linkedIn", profile.Contact.LinkedIn);
SetIf("summary", profile.Summary.FirstOrDefault());
SetIf("skills", profile.Skills.FirstOrDefault());
SetIf("languages", profile.Languages.FirstOrDefault()?.Name);
SetIf("interests", profile.Interests.FirstOrDefault());
SetIf("jobs", profile.Jobs.FirstOrDefault()?.Title ?? profile.Jobs.FirstOrDefault()?.Company);
SetIf("education", profile.Education.FirstOrDefault()?.Qualification ?? profile.Education.FirstOrDefault()?.Institution);
}
private async Task<StructuredCvProfile?> TryExtractStructuredCvAsync(string text, CancellationToken cancellationToken)
{
var structuredJson = await _aiService.SummarizeSectionAsync(
@@ -446,7 +522,10 @@ public sealed class ProfileCvController : ControllerBase
if (string.IsNullOrWhiteSpace(extracted)) return null;
var parsed = StructuredCvProfileJson.Deserialize(extracted);
return IsMeaningfullyStructured(parsed) ? parsed : null;
if (!IsMeaningfullyStructured(parsed)) return null;
AnnotateStructuredCv(parsed, "llm", 0.82);
return parsed;
}
private static bool IsMeaningfullyStructured(StructuredCvProfile profile)