Improve CV parsing and profile editor flow
This commit is contained in:
@@ -124,6 +124,10 @@ public sealed class AdminSystemController : ControllerBase
|
||||
GpuName: null,
|
||||
OcrAvailable: false,
|
||||
OcrLanguages: null,
|
||||
OllamaConfigured: null,
|
||||
OllamaReachable: null,
|
||||
OllamaModel: null,
|
||||
OllamaModelAvailable: null,
|
||||
HealthLatencyMs: null,
|
||||
ProbeLatencyMs: null,
|
||||
LastProbeAt: null,
|
||||
|
||||
@@ -61,13 +61,15 @@ public sealed class ProfileCvController : ControllerBase
|
||||
|
||||
private readonly UserManager<ApplicationUser> _users;
|
||||
private readonly ISummarizerService _aiService;
|
||||
private readonly ICvAiClassifier _cvAiClassifier;
|
||||
private readonly JobTrackerContext _db;
|
||||
private readonly AppPaths _paths;
|
||||
|
||||
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths)
|
||||
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths, ICvAiClassifier? cvAiClassifier = null)
|
||||
{
|
||||
_users = users;
|
||||
_aiService = aiService;
|
||||
_cvAiClassifier = cvAiClassifier ?? NoOpCvAiClassifier.Instance;
|
||||
_db = db;
|
||||
_paths = paths;
|
||||
}
|
||||
@@ -338,14 +340,7 @@ public sealed class ProfileCvController : ControllerBase
|
||||
private async Task<StructuredCvProfile> BuildStructuredCvAsync(string text, CancellationToken cancellationToken)
|
||||
{
|
||||
var parseSource = NormalizeTextForStructuredParsing(text);
|
||||
var fallbackSections = ParseSections(parseSource)
|
||||
.Select(section => new StructuredCvSection
|
||||
{
|
||||
Name = section.Name,
|
||||
Content = section.Content,
|
||||
WordCount = CountWords(section.Content),
|
||||
})
|
||||
.ToList();
|
||||
var fallbackSections = await BuildFallbackSectionsAsync(parseSource, cancellationToken);
|
||||
|
||||
var sectionFallback = StructuredCvProfileJson.FromSections(fallbackSections);
|
||||
AnnotateStructuredCv(sectionFallback, "repair", 0.56);
|
||||
@@ -729,12 +724,19 @@ public sealed class ProfileCvController : ControllerBase
|
||||
private static List<StructuredCvLanguage> ParseLanguagesHeuristically(string content)
|
||||
{
|
||||
var languages = new List<StructuredCvLanguage>();
|
||||
foreach (Match match in Regex.Matches(content, @"\b(English|Norwegian|Norsk|German|French|Spanish|Swedish|Danish)\b(?:[^\n.,;:]*?\b(Native|Fluent|Advanced|Intermediate|Beginner|A1|A2|B1|B2|C1|C2|Native speaker)\b)?", RegexOptions.IgnoreCase))
|
||||
var candidates = Regex.Split(content.Replace("\r\n", "\n"), @"[\n,;]+|(?<=[.!?])\s+")
|
||||
.Select(item => item.Trim())
|
||||
.Where(item => item.Length > 1);
|
||||
|
||||
foreach (var candidate in candidates)
|
||||
{
|
||||
var name = NullIfWhitespace(match.Groups[1].Value);
|
||||
var level = NullIfWhitespace(match.Groups[2].Value);
|
||||
if (name is null) continue;
|
||||
languages.Add(new StructuredCvLanguage { Name = name, Level = level });
|
||||
var level = HumanLanguageCatalog.ExtractLevel(candidate);
|
||||
if (level is null) continue;
|
||||
|
||||
foreach (var name in HumanLanguageCatalog.ExtractLanguageNames(candidate))
|
||||
{
|
||||
languages.Add(new StructuredCvLanguage { Name = name, Level = level });
|
||||
}
|
||||
}
|
||||
|
||||
return languages
|
||||
@@ -872,6 +874,86 @@ public sealed class ProfileCvController : ControllerBase
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private async Task<List<StructuredCvSection>> BuildFallbackSectionsAsync(string parseSource, CancellationToken cancellationToken)
|
||||
{
|
||||
var parsed = ParseSections(parseSource)
|
||||
.Select(section => new StructuredCvSection
|
||||
{
|
||||
Name = section.Name,
|
||||
Content = section.Content,
|
||||
WordCount = CountWords(section.Content),
|
||||
})
|
||||
.ToList();
|
||||
|
||||
var hasRealSections = parsed.Any(section => !string.Equals(section.Name, "General", StringComparison.OrdinalIgnoreCase));
|
||||
if (hasRealSections) return parsed;
|
||||
|
||||
var aiSections = await ClassifyBlocksIntoSectionsAsync(parseSource, cancellationToken);
|
||||
return aiSections.Count > 0 ? aiSections : parsed;
|
||||
}
|
||||
|
||||
private async Task<List<StructuredCvSection>> ClassifyBlocksIntoSectionsAsync(string parseSource, CancellationToken cancellationToken)
|
||||
{
|
||||
var blocks = Regex.Split(parseSource.Replace("\r\n", "\n"), @"\n\s*\n")
|
||||
.Select(block => block.Trim())
|
||||
.Where(block => block.Length >= 24)
|
||||
.ToList();
|
||||
|
||||
if (blocks.Count == 0) return new List<StructuredCvSection>();
|
||||
|
||||
var sectionBuckets = new List<StructuredCvSection>();
|
||||
foreach (var block in blocks)
|
||||
{
|
||||
var classification = await _cvAiClassifier.ClassifyBlockAsync(block, cancellationToken);
|
||||
var sectionName = classification?.Section;
|
||||
if (!string.IsNullOrWhiteSpace(sectionName) && SectionAliases.TryGetValue(sectionName, out var canonical))
|
||||
{
|
||||
sectionName = canonical;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(sectionName) || string.Equals(sectionName, "Other", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
sectionName = "General";
|
||||
}
|
||||
|
||||
var content = block;
|
||||
if (string.Equals(sectionName, "Work Experience", StringComparison.OrdinalIgnoreCase) && classification is not null)
|
||||
{
|
||||
var lines = new List<string>();
|
||||
if (!string.IsNullOrWhiteSpace(classification.Title)) lines.Add($"### {classification.Title.Trim()}");
|
||||
var endIsCurrent = string.Equals(classification.End, "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(classification.End, "Current", StringComparison.OrdinalIgnoreCase);
|
||||
var dateRange = FormatDateRangeForSection(classification.Start, classification.End, endIsCurrent);
|
||||
var meta = string.Join(" | ", new[] { classification.Company, classification.Location, dateRange }.Where(value => !string.IsNullOrWhiteSpace(value)));
|
||||
if (!string.IsNullOrWhiteSpace(meta)) lines.Add(meta);
|
||||
if (classification.Bullets is not null)
|
||||
{
|
||||
lines.AddRange(classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}"));
|
||||
}
|
||||
if (lines.Count > 0) content = string.Join("\n", lines);
|
||||
}
|
||||
|
||||
var existing = sectionBuckets.FirstOrDefault(section => section.Name == sectionName);
|
||||
if (existing is null)
|
||||
{
|
||||
sectionBuckets.Add(new StructuredCvSection { Name = sectionName, Content = content, WordCount = CountWords(content) });
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.Content = $"{existing.Content}\n\n{content}".Trim();
|
||||
existing.WordCount = CountWords(existing.Content);
|
||||
}
|
||||
}
|
||||
|
||||
return sectionBuckets.Where(section => !string.IsNullOrWhiteSpace(section.Content)).ToList();
|
||||
}
|
||||
|
||||
private static string? FormatDateRangeForSection(string? start, string? end, bool isCurrent)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(start) && string.IsNullOrWhiteSpace(end)) return null;
|
||||
if (string.IsNullOrWhiteSpace(start)) return end;
|
||||
return $"{start} - {(isCurrent ? "Present" : end ?? "Present")}";
|
||||
}
|
||||
|
||||
private async Task<string> MaybeReconstructStructuredCvAsync(string text, CancellationToken cancellationToken)
|
||||
{
|
||||
var normalized = text.Trim();
|
||||
|
||||
Reference in New Issue
Block a user