Improve CV parsing and profile editor flow
This commit is contained in:
@@ -124,6 +124,10 @@ public sealed class AdminSystemController : ControllerBase
|
||||
GpuName: null,
|
||||
OcrAvailable: false,
|
||||
OcrLanguages: null,
|
||||
OllamaConfigured: null,
|
||||
OllamaReachable: null,
|
||||
OllamaModel: null,
|
||||
OllamaModelAvailable: null,
|
||||
HealthLatencyMs: null,
|
||||
ProbeLatencyMs: null,
|
||||
LastProbeAt: null,
|
||||
|
||||
@@ -61,13 +61,15 @@ public sealed class ProfileCvController : ControllerBase
|
||||
|
||||
private readonly UserManager<ApplicationUser> _users;
|
||||
private readonly ISummarizerService _aiService;
|
||||
private readonly ICvAiClassifier _cvAiClassifier;
|
||||
private readonly JobTrackerContext _db;
|
||||
private readonly AppPaths _paths;
|
||||
|
||||
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths)
|
||||
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths, ICvAiClassifier? cvAiClassifier = null)
|
||||
{
|
||||
_users = users;
|
||||
_aiService = aiService;
|
||||
_cvAiClassifier = cvAiClassifier ?? NoOpCvAiClassifier.Instance;
|
||||
_db = db;
|
||||
_paths = paths;
|
||||
}
|
||||
@@ -338,14 +340,7 @@ public sealed class ProfileCvController : ControllerBase
|
||||
private async Task<StructuredCvProfile> BuildStructuredCvAsync(string text, CancellationToken cancellationToken)
|
||||
{
|
||||
var parseSource = NormalizeTextForStructuredParsing(text);
|
||||
var fallbackSections = ParseSections(parseSource)
|
||||
.Select(section => new StructuredCvSection
|
||||
{
|
||||
Name = section.Name,
|
||||
Content = section.Content,
|
||||
WordCount = CountWords(section.Content),
|
||||
})
|
||||
.ToList();
|
||||
var fallbackSections = await BuildFallbackSectionsAsync(parseSource, cancellationToken);
|
||||
|
||||
var sectionFallback = StructuredCvProfileJson.FromSections(fallbackSections);
|
||||
AnnotateStructuredCv(sectionFallback, "repair", 0.56);
|
||||
@@ -729,12 +724,19 @@ public sealed class ProfileCvController : ControllerBase
|
||||
private static List<StructuredCvLanguage> ParseLanguagesHeuristically(string content)
|
||||
{
|
||||
var languages = new List<StructuredCvLanguage>();
|
||||
foreach (Match match in Regex.Matches(content, @"\b(English|Norwegian|Norsk|German|French|Spanish|Swedish|Danish)\b(?:[^\n.,;:]*?\b(Native|Fluent|Advanced|Intermediate|Beginner|A1|A2|B1|B2|C1|C2|Native speaker)\b)?", RegexOptions.IgnoreCase))
|
||||
var candidates = Regex.Split(content.Replace("\r\n", "\n"), @"[\n,;]+|(?<=[.!?])\s+")
|
||||
.Select(item => item.Trim())
|
||||
.Where(item => item.Length > 1);
|
||||
|
||||
foreach (var candidate in candidates)
|
||||
{
|
||||
var name = NullIfWhitespace(match.Groups[1].Value);
|
||||
var level = NullIfWhitespace(match.Groups[2].Value);
|
||||
if (name is null) continue;
|
||||
languages.Add(new StructuredCvLanguage { Name = name, Level = level });
|
||||
var level = HumanLanguageCatalog.ExtractLevel(candidate);
|
||||
if (level is null) continue;
|
||||
|
||||
foreach (var name in HumanLanguageCatalog.ExtractLanguageNames(candidate))
|
||||
{
|
||||
languages.Add(new StructuredCvLanguage { Name = name, Level = level });
|
||||
}
|
||||
}
|
||||
|
||||
return languages
|
||||
@@ -872,6 +874,86 @@ public sealed class ProfileCvController : ControllerBase
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private async Task<List<StructuredCvSection>> BuildFallbackSectionsAsync(string parseSource, CancellationToken cancellationToken)
|
||||
{
|
||||
var parsed = ParseSections(parseSource)
|
||||
.Select(section => new StructuredCvSection
|
||||
{
|
||||
Name = section.Name,
|
||||
Content = section.Content,
|
||||
WordCount = CountWords(section.Content),
|
||||
})
|
||||
.ToList();
|
||||
|
||||
var hasRealSections = parsed.Any(section => !string.Equals(section.Name, "General", StringComparison.OrdinalIgnoreCase));
|
||||
if (hasRealSections) return parsed;
|
||||
|
||||
var aiSections = await ClassifyBlocksIntoSectionsAsync(parseSource, cancellationToken);
|
||||
return aiSections.Count > 0 ? aiSections : parsed;
|
||||
}
|
||||
|
||||
private async Task<List<StructuredCvSection>> ClassifyBlocksIntoSectionsAsync(string parseSource, CancellationToken cancellationToken)
|
||||
{
|
||||
var blocks = Regex.Split(parseSource.Replace("\r\n", "\n"), @"\n\s*\n")
|
||||
.Select(block => block.Trim())
|
||||
.Where(block => block.Length >= 24)
|
||||
.ToList();
|
||||
|
||||
if (blocks.Count == 0) return new List<StructuredCvSection>();
|
||||
|
||||
var sectionBuckets = new List<StructuredCvSection>();
|
||||
foreach (var block in blocks)
|
||||
{
|
||||
var classification = await _cvAiClassifier.ClassifyBlockAsync(block, cancellationToken);
|
||||
var sectionName = classification?.Section;
|
||||
if (!string.IsNullOrWhiteSpace(sectionName) && SectionAliases.TryGetValue(sectionName, out var canonical))
|
||||
{
|
||||
sectionName = canonical;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(sectionName) || string.Equals(sectionName, "Other", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
sectionName = "General";
|
||||
}
|
||||
|
||||
var content = block;
|
||||
if (string.Equals(sectionName, "Work Experience", StringComparison.OrdinalIgnoreCase) && classification is not null)
|
||||
{
|
||||
var lines = new List<string>();
|
||||
if (!string.IsNullOrWhiteSpace(classification.Title)) lines.Add($"### {classification.Title.Trim()}");
|
||||
var endIsCurrent = string.Equals(classification.End, "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(classification.End, "Current", StringComparison.OrdinalIgnoreCase);
|
||||
var dateRange = FormatDateRangeForSection(classification.Start, classification.End, endIsCurrent);
|
||||
var meta = string.Join(" | ", new[] { classification.Company, classification.Location, dateRange }.Where(value => !string.IsNullOrWhiteSpace(value)));
|
||||
if (!string.IsNullOrWhiteSpace(meta)) lines.Add(meta);
|
||||
if (classification.Bullets is not null)
|
||||
{
|
||||
lines.AddRange(classification.Bullets.Where(bullet => !string.IsNullOrWhiteSpace(bullet)).Select(bullet => $"- {bullet.Trim()}"));
|
||||
}
|
||||
if (lines.Count > 0) content = string.Join("\n", lines);
|
||||
}
|
||||
|
||||
var existing = sectionBuckets.FirstOrDefault(section => section.Name == sectionName);
|
||||
if (existing is null)
|
||||
{
|
||||
sectionBuckets.Add(new StructuredCvSection { Name = sectionName, Content = content, WordCount = CountWords(content) });
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.Content = $"{existing.Content}\n\n{content}".Trim();
|
||||
existing.WordCount = CountWords(existing.Content);
|
||||
}
|
||||
}
|
||||
|
||||
return sectionBuckets.Where(section => !string.IsNullOrWhiteSpace(section.Content)).ToList();
|
||||
}
|
||||
|
||||
private static string? FormatDateRangeForSection(string? start, string? end, bool isCurrent)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(start) && string.IsNullOrWhiteSpace(end)) return null;
|
||||
if (string.IsNullOrWhiteSpace(start)) return end;
|
||||
return $"{start} - {(isCurrent ? "Present" : end ?? "Present")}";
|
||||
}
|
||||
|
||||
private async Task<string> MaybeReconstructStructuredCvAsync(string text, CancellationToken cancellationToken)
|
||||
{
|
||||
var normalized = text.Trim();
|
||||
|
||||
@@ -132,6 +132,7 @@ builder.Services.AddHttpClient("ai-service", client =>
|
||||
|
||||
builder.Services.AddMemoryCache();
|
||||
builder.Services.AddSingleton<ISummarizerService, SummarizerService>();
|
||||
builder.Services.AddSingleton<ICvAiClassifier, CvAiClassifier>();
|
||||
builder.Services.AddSingleton<IGoogleTokenValidator, GoogleTokenValidator>();
|
||||
builder.Services.AddScoped<IGmailOAuthService, GmailOAuthService>();
|
||||
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace JobTrackerApi.Services;
|
||||
|
||||
public sealed record CvBlockClassificationResult(
|
||||
string? Section,
|
||||
double? Confidence,
|
||||
string? Reason,
|
||||
string? Title,
|
||||
string? Company,
|
||||
string? Location,
|
||||
string? Start,
|
||||
string? End,
|
||||
List<string>? Bullets);
|
||||
|
||||
public interface ICvAiClassifier
|
||||
{
|
||||
Task<CvBlockClassificationResult?> ClassifyBlockAsync(string block, CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
public sealed class CvAiClassifier : ICvAiClassifier
|
||||
{
|
||||
private readonly IHttpClientFactory _httpClientFactory;
|
||||
|
||||
public CvAiClassifier(IHttpClientFactory httpClientFactory)
|
||||
{
|
||||
_httpClientFactory = httpClientFactory;
|
||||
}
|
||||
|
||||
public async Task<CvBlockClassificationResult?> ClassifyBlockAsync(string block, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(block)) return null;
|
||||
|
||||
try
|
||||
{
|
||||
var client = _httpClientFactory.CreateClient("ai-service");
|
||||
var payload = JsonSerializer.Serialize(new { block });
|
||||
using var content = new StringContent(payload, Encoding.UTF8, "application/json");
|
||||
using var response = await client.PostAsync("/cv/classify-block", content, cancellationToken);
|
||||
if (!response.IsSuccessStatusCode) return null;
|
||||
|
||||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
|
||||
var parsed = await JsonSerializer.DeserializeAsync<CvBlockClassificationResult>(stream, new JsonSerializerOptions(JsonSerializerDefaults.Web)
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
}, cancellationToken);
|
||||
|
||||
return parsed;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class NoOpCvAiClassifier : ICvAiClassifier
|
||||
{
|
||||
public static NoOpCvAiClassifier Instance { get; } = new();
|
||||
private NoOpCvAiClassifier() { }
|
||||
public Task<CvBlockClassificationResult?> ClassifyBlockAsync(string block, CancellationToken cancellationToken = default)
|
||||
=> Task.FromResult<CvBlockClassificationResult?>(null);
|
||||
}
|
||||
@@ -21,6 +21,10 @@ namespace JobTrackerApi.Services
|
||||
string? GpuName,
|
||||
bool? OcrAvailable,
|
||||
string? OcrLanguages,
|
||||
bool? OllamaConfigured,
|
||||
bool? OllamaReachable,
|
||||
string? OllamaModel,
|
||||
bool? OllamaModelAvailable,
|
||||
double? HealthLatencyMs,
|
||||
double? ProbeLatencyMs,
|
||||
DateTimeOffset? LastProbeAt,
|
||||
@@ -310,6 +314,10 @@ namespace JobTrackerApi.Services
|
||||
string? gpuName = null;
|
||||
bool? ocrAvailable = null;
|
||||
string? ocrLanguages = null;
|
||||
bool? ollamaConfigured = null;
|
||||
bool? ollamaReachable = null;
|
||||
string? ollamaModel = null;
|
||||
bool? ollamaModelAvailable = null;
|
||||
double? healthLatencyMs = null;
|
||||
var healthy = false;
|
||||
string? healthError = null;
|
||||
@@ -332,6 +340,10 @@ namespace JobTrackerApi.Services
|
||||
if (doc.RootElement.TryGetProperty("gpu_name", out var gpuNameEl)) gpuName = gpuNameEl.GetString();
|
||||
if (doc.RootElement.TryGetProperty("ocr_available", out var ocrAvailableEl) && ocrAvailableEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ocrAvailable = ocrAvailableEl.GetBoolean();
|
||||
if (doc.RootElement.TryGetProperty("ocr_languages", out var ocrLanguagesEl)) ocrLanguages = ocrLanguagesEl.GetString();
|
||||
if (doc.RootElement.TryGetProperty("ollama_configured", out var ollamaConfiguredEl) && ollamaConfiguredEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ollamaConfigured = ollamaConfiguredEl.GetBoolean();
|
||||
if (doc.RootElement.TryGetProperty("ollama_reachable", out var ollamaReachableEl) && ollamaReachableEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ollamaReachable = ollamaReachableEl.GetBoolean();
|
||||
if (doc.RootElement.TryGetProperty("ollama_model", out var ollamaModelEl)) ollamaModel = ollamaModelEl.GetString();
|
||||
if (doc.RootElement.TryGetProperty("ollama_model_available", out var ollamaModelAvailableEl) && ollamaModelAvailableEl.ValueKind is JsonValueKind.True or JsonValueKind.False) ollamaModelAvailable = ollamaModelAvailableEl.GetBoolean();
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -390,6 +402,10 @@ namespace JobTrackerApi.Services
|
||||
GpuName: gpuName,
|
||||
OcrAvailable: ocrAvailable,
|
||||
OcrLanguages: ocrLanguages,
|
||||
OllamaConfigured: ollamaConfigured,
|
||||
OllamaReachable: ollamaReachable,
|
||||
OllamaModel: ollamaModel,
|
||||
OllamaModelAvailable: ollamaModelAvailable,
|
||||
HealthLatencyMs: healthLatencyMs,
|
||||
ProbeLatencyMs: probeLatencyMs,
|
||||
LastProbeAt: lastProbeAt,
|
||||
|
||||
Reference in New Issue
Block a user