refactor, security updates, cv extraction upgrades

This commit is contained in:
2026-04-11 01:34:32 +02:00
parent 806b200ac5
commit 27fd70a2d7
59 changed files with 6817 additions and 1561 deletions
+670 -72
View File
@@ -52,6 +52,7 @@ public sealed class ProfileCvController : ControllerBase
["certificates"] = "Certifications",
["languages"] = "Languages",
["interests"] = "Interests",
["hobbies"] = "Interests",
};
private const long MaxFileSizeBytes = 5 * 1024 * 1024;
@@ -62,22 +63,34 @@ public sealed class ProfileCvController : ControllerBase
private readonly UserManager<ApplicationUser> _users;
private readonly ISummarizerService _aiService;
private readonly ICvAiClassifier _cvAiClassifier;
private readonly ICvAiNormalizer _cvAiNormalizer;
private readonly JobTrackerContext _db;
private readonly AppPaths _paths;
private readonly ILogger<ProfileCvController> _logger;
private readonly ICvTemplateRenderer _cvTemplateRenderer;
private readonly ICvPdfExporter _cvPdfExporter;
private readonly ICvProcessingQueue _cvProcessingQueue;
private readonly IAppEmailSender _emailSender;
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths, ILogger<ProfileCvController>? logger = null, ICvAiClassifier? cvAiClassifier = null, ICvTemplateRenderer? cvTemplateRenderer = null, ICvPdfExporter? cvPdfExporter = null)
public ProfileCvController(UserManager<ApplicationUser> users, ISummarizerService aiService, JobTrackerContext db, AppPaths paths, ILogger<ProfileCvController>? logger = null, ICvAiClassifier? cvAiClassifier = null, ICvAiNormalizer? cvAiNormalizer = null, ICvTemplateRenderer? cvTemplateRenderer = null, ICvPdfExporter? cvPdfExporter = null, ICvProcessingQueue? cvProcessingQueue = null, IAppEmailSender? emailSender = null)
{
_users = users;
_aiService = aiService;
_cvAiClassifier = cvAiClassifier ?? NoOpCvAiClassifier.Instance;
_cvAiNormalizer = cvAiNormalizer ?? NoOpCvAiNormalizer.Instance;
_db = db;
_paths = paths;
_logger = logger ?? Microsoft.Extensions.Logging.Abstractions.NullLogger<ProfileCvController>.Instance;
_cvTemplateRenderer = cvTemplateRenderer ?? new CvTemplateRenderer();
_cvPdfExporter = cvPdfExporter ?? new ThrowingCvPdfExporter();
_cvProcessingQueue = cvProcessingQueue ?? NoOpCvProcessingQueue.Instance;
_emailSender = emailSender ?? NoOpEmailSender.Instance;
}
private sealed class NoOpEmailSender : IAppEmailSender
{
public static readonly NoOpEmailSender Instance = new();
public Task SendAsync(string toEmail, string subject, string bodyText, CancellationToken cancellationToken = default) => Task.CompletedTask;
}
private sealed class ThrowingCvPdfExporter : ICvPdfExporter
@@ -246,26 +259,9 @@ public sealed class ProfileCvController : ControllerBase
return BadRequest("The stored CV artifact could not be found for reprocessing.");
}
await using var stream = System.IO.File.OpenRead(artifact.StoragePath);
var file = new FormFile(stream, 0, stream.Length, "file", artifact.OriginalFileName)
{
Headers = new HeaderDictionary(),
ContentType = artifact.MimeType
};
var extension = Path.GetExtension(artifact.OriginalFileName ?? string.Empty);
var result = await ExtractStructuredCvFromFileAsync(file, extension, HttpContext.RequestAborted);
await ApplyTextExtractionRunAsync(user, "reprocess", result.RawText, result.NormalizedText, result.StructuredCv, artifact.Id, HttpContext.RequestAborted);
return Ok(new
{
reprocessed = true,
artifactId = artifact.Id,
extractionRunId = user.CurrentCvExtractionRunId,
profileVersion = user.CurrentCvProfileVersion,
structuredCv = result.StructuredCv,
sections = result.StructuredCv.Sections,
});
var run = await CreateQueuedRunAsync(user.Id, artifact.Id, "reprocess", HttpContext.RequestAborted);
await _cvProcessingQueue.EnqueueAsync(run.Id, HttpContext.RequestAborted);
return Accepted(new { queued = true, extractionRunId = run.Id, status = run.Status });
}
[HttpPost("rebuild")]
@@ -275,22 +271,9 @@ public sealed class ProfileCvController : ControllerBase
if (user is null) return Unauthorized();
if (string.IsNullOrWhiteSpace(user.ProfileCvText)) return BadRequest("Add or import CV text before rebuilding it.");
var rebuilt = await _aiService.SummarizeSectionAsync(
"Rewrite this CV into a stronger master CV with clear sections such as Professional Summary, Core Skills, Experience Highlights, and Selected Achievements. Preserve only factual claims, avoid inventing employers or metrics, and make the output clean and ready for tailoring to job applications. Return only the rebuilt CV text.",
user.ProfileCvText,
2200,
700);
if (string.IsNullOrWhiteSpace(rebuilt))
{
return BadRequest("The AI service could not rebuild your CV text right now.");
}
user.ProfileCvText = rebuilt.Trim();
var structuredCv = await BuildStructuredCvAsync(user.ProfileCvText, HttpContext.RequestAborted);
await ApplyTextExtractionRunAsync(user, "rebuild", user.ProfileCvText, user.ProfileCvText, structuredCv, user.CurrentCvUploadArtifactId, HttpContext.RequestAborted);
return Ok(new { rebuilt = true, characters = user.ProfileCvText.Length, text = user.ProfileCvText, structuredCv, sections = structuredCv.Sections, extractionRunId = user.CurrentCvExtractionRunId, profileVersion = user.CurrentCvProfileVersion });
var run = await CreateQueuedRunAsync(user.Id, user.CurrentCvUploadArtifactId, "rebuild", HttpContext.RequestAborted);
await _cvProcessingQueue.EnqueueAsync(run.Id, HttpContext.RequestAborted);
return Accepted(new { queued = true, extractionRunId = run.Id, status = run.Status });
}
[HttpPost("rewrite-section")]
@@ -446,14 +429,15 @@ public sealed class ProfileCvController : ControllerBase
var source = string.IsNullOrWhiteSpace(request?.Text) ? user.ProfileCvText : request!.Text;
if (string.IsNullOrWhiteSpace(source)) return BadRequest("Add or import CV text before parsing sections.");
var structuredCv = await BuildStructuredCvAsync(source, HttpContext.RequestAborted);
var normalizedSource = await MaybeReconstructStructuredCvAsync(source, HttpContext.RequestAborted);
var structuredCv = await BuildStructuredCvAsync(normalizedSource, HttpContext.RequestAborted);
if (string.IsNullOrWhiteSpace(request?.Text))
{
user.ProfileCvText = source;
user.ProfileCvText = normalizedSource;
}
await ApplyTextExtractionRunAsync(user, "parse", source, source, structuredCv, user.CurrentCvUploadArtifactId, HttpContext.RequestAborted);
await ApplyTextExtractionRunAsync(user, "parse", source, normalizedSource, structuredCv, user.CurrentCvUploadArtifactId, HttpContext.RequestAborted);
return Ok(new { structuredCv, sections = structuredCv.Sections, totalWords = CountWords(source), extractionRunId = user.CurrentCvExtractionRunId, profileVersion = user.CurrentCvProfileVersion });
return Ok(new { structuredCv, sections = structuredCv.Sections, totalWords = CountWords(normalizedSource), extractionRunId = user.CurrentCvExtractionRunId, profileVersion = user.CurrentCvProfileVersion });
}
[HttpPost("improve")]
@@ -463,22 +447,9 @@ public sealed class ProfileCvController : ControllerBase
if (user is null) return Unauthorized();
if (string.IsNullOrWhiteSpace(user.ProfileCvText)) return BadRequest("Add or import CV text before improving it.");
var improved = await _aiService.SummarizeSectionAsync(
"Rewrite this CV into a cleaner, better-structured master CV profile. Preserve factual claims, employers, skills, and measurable results. Improve clarity, tighten wording, use strong bullet-style phrasing, and keep it ready for further tailoring to specific roles. Return only the improved CV text.",
user.ProfileCvText,
1800,
500);
if (string.IsNullOrWhiteSpace(improved))
{
return BadRequest("The AI service could not improve your CV text right now.");
}
user.ProfileCvText = improved.Trim();
var structuredCv = await BuildStructuredCvAsync(user.ProfileCvText, HttpContext.RequestAborted);
await ApplyTextExtractionRunAsync(user, "improve", user.ProfileCvText, user.ProfileCvText, structuredCv, user.CurrentCvUploadArtifactId, HttpContext.RequestAborted);
return Ok(new { improved = true, characters = user.ProfileCvText.Length, text = user.ProfileCvText, structuredCv, sections = structuredCv.Sections, extractionRunId = user.CurrentCvExtractionRunId, profileVersion = user.CurrentCvProfileVersion });
var run = await CreateQueuedRunAsync(user.Id, user.CurrentCvUploadArtifactId, "improve", HttpContext.RequestAborted);
await _cvProcessingQueue.EnqueueAsync(run.Id, HttpContext.RequestAborted);
return Accepted(new { queued = true, extractionRunId = run.Id, status = run.Status });
}
private static string BuildRewriteSourceText(string? sectionName, string? sourceText, StructuredCvProfile structuredCv)
@@ -671,6 +642,13 @@ public sealed class ProfileCvController : ControllerBase
private async Task<StructuredCvProfile> BuildStructuredCvAsync(string text, CancellationToken cancellationToken)
{
if (LooksLikeNormalizedMarkdownCv(text))
{
var normalized = BuildStructuredCvFromNormalizedMarkdown(text);
AnnotateStructuredCv(normalized, "normalized-markdown", 0.78);
return StructuredCvProfileJson.Normalize(normalized);
}
var parseSource = NormalizeTextForStructuredParsing(text);
var parsedSections = ParseSections(parseSource)
.Select(section => new StructuredCvSection
@@ -711,6 +689,40 @@ public sealed class ProfileCvController : ControllerBase
var extracted = await TryExtractStructuredCvAsync(parseSource, cancellationToken);
var merged = StructuredCvProfileJson.Merge(extracted, fallback);
merged.Contact.FullName ??= GuessFullName(text) ?? GuessFullNameFromEmail(merged.Contact.Email);
if (!IsPlausibleLocationValue(merged.Contact.Location, merged.Contact.FullName))
{
merged.Contact.Location = PreferDetectedLocation(text, null, merged.Contact.FullName);
}
merged.Jobs = merged.Jobs
.Where(job => !LooksLikePersonName(job.Title ?? string.Empty))
.ToList();
var reparsedJobs = ParseJobsHeuristically(text)
.Where(job => !LooksLikePersonName(job.Title ?? string.Empty))
.ToList();
var existingFirstTitle = merged.Jobs.FirstOrDefault()?.Title;
var reparsedFirstTitle = reparsedJobs.FirstOrDefault()?.Title;
if (LooksLikePersonName(existingFirstTitle ?? string.Empty)
&& LooksLikeRoleOrHeadline(reparsedFirstTitle ?? string.Empty)
&& ArePlausibleJobs(reparsedJobs, merged.Contact.FullName))
{
merged.Jobs = reparsedJobs;
}
else if (ArePlausibleJobs(merged.Jobs, merged.Contact.FullName))
{
if (ScoreJobs(reparsedJobs, merged.Contact.FullName) > ScoreJobs(merged.Jobs, merged.Contact.FullName))
{
merged.Jobs = reparsedJobs;
}
}
else if (ArePlausibleJobs(reparsedJobs, merged.Contact.FullName))
{
merged.Jobs = reparsedJobs;
}
return StructuredCvProfileJson.Normalize(merged);
}
@@ -829,6 +841,167 @@ public sealed class ProfileCvController : ControllerBase
await _db.SaveChangesAsync(cancellationToken);
}
private async Task<CvExtractionRun> CreateQueuedRunAsync(string ownerUserId, int? artifactId, string trigger, CancellationToken cancellationToken)
{
var run = new CvExtractionRun
{
OwnerUserId = ownerUserId,
ArtifactId = artifactId,
Trigger = trigger,
ParserVersion = ParserVersion,
NormalizerVersion = NormalizerVersion,
LlmPromptVersion = LlmPromptVersion,
Status = "queued",
StartedAtUtc = DateTimeOffset.UtcNow,
};
_db.CvExtractionRuns.Add(run);
await _db.SaveChangesAsync(cancellationToken);
return run;
}
public async Task ProcessQueuedRunAsync(int runId, CancellationToken cancellationToken)
{
var run = await _db.CvExtractionRuns.FirstOrDefaultAsync(x => x.Id == runId, cancellationToken);
if (run is null) return;
var user = await _users.FindByIdAsync(run.OwnerUserId);
if (user is null)
{
run.Status = "failed";
run.ErrorMessage = "CV processing user was not found.";
run.CompletedAtUtc = DateTimeOffset.UtcNow;
await _db.SaveChangesAsync(cancellationToken);
return;
}
run.Status = "running";
run.ErrorMessage = null;
await _db.SaveChangesAsync(cancellationToken);
try
{
switch (run.Trigger)
{
case "rebuild":
{
if (string.IsNullOrWhiteSpace(user.ProfileCvText)) throw new InvalidOperationException("Add or import CV text before rebuilding it.");
var rebuilt = await _aiService.SummarizeSectionAsync(
"Rewrite this CV into a stronger master CV with clear sections such as Professional Summary, Core Skills, Experience Highlights, and Selected Achievements. Preserve only factual claims, avoid inventing employers or metrics, and make the output clean and ready for tailoring to job applications. Return only the rebuilt CV text.",
user.ProfileCvText,
2200,
700);
if (string.IsNullOrWhiteSpace(rebuilt)) throw new InvalidOperationException("The AI service could not rebuild your CV text right now.");
var normalizedText = rebuilt.Trim();
var structuredCv = await BuildStructuredCvAsync(normalizedText, cancellationToken);
await ApplyQueuedRunResultAsync(run, user, normalizedText, normalizedText, structuredCv, run.ArtifactId, cancellationToken);
break;
}
case "improve":
{
if (string.IsNullOrWhiteSpace(user.ProfileCvText)) throw new InvalidOperationException("Add or import CV text before improving it.");
var improved = await _aiService.SummarizeSectionAsync(
"Rewrite this CV into a cleaner, better-structured master CV profile. Preserve factual claims, employers, skills, and measurable results. Improve clarity, tighten wording, use strong bullet-style phrasing, and keep it ready for further tailoring to specific roles. Return only the improved CV text.",
user.ProfileCvText,
1800,
500);
if (string.IsNullOrWhiteSpace(improved)) throw new InvalidOperationException("The AI service could not improve your CV text right now.");
var normalizedText = improved.Trim();
var structuredCv = await BuildStructuredCvAsync(normalizedText, cancellationToken);
await ApplyQueuedRunResultAsync(run, user, normalizedText, normalizedText, structuredCv, run.ArtifactId, cancellationToken);
break;
}
case "reprocess":
{
var artifact = await _db.CvUploadArtifacts.FirstOrDefaultAsync(x => x.Id == run.ArtifactId && x.OwnerUserId == user.Id, cancellationToken);
if (artifact is null) throw new InvalidOperationException("Upload a CV before reprocessing it.");
if (string.IsNullOrWhiteSpace(artifact.StoragePath) || !System.IO.File.Exists(artifact.StoragePath))
{
throw new InvalidOperationException("The stored CV artifact could not be found for reprocessing.");
}
await using var stream = System.IO.File.OpenRead(artifact.StoragePath);
var file = new FormFile(stream, 0, stream.Length, "file", artifact.OriginalFileName)
{
Headers = new HeaderDictionary(),
ContentType = artifact.MimeType
};
var extension = Path.GetExtension(artifact.OriginalFileName ?? string.Empty);
var result = await ExtractStructuredCvFromFileAsync(file, extension, cancellationToken);
await ApplyQueuedRunResultAsync(run, user, result.RawText, result.NormalizedText, result.StructuredCv, artifact.Id, cancellationToken);
break;
}
default:
throw new InvalidOperationException($"Unsupported CV processing trigger '{run.Trigger}'.");
}
await SendRunCompletionEmailAsync(user, run, true, cancellationToken);
}
catch (Exception ex)
{
run.Status = "failed";
run.ErrorMessage = ex.Message;
run.CompletedAtUtc = DateTimeOffset.UtcNow;
await _db.SaveChangesAsync(cancellationToken);
await SendRunCompletionEmailAsync(user, run, false, cancellationToken);
_logger.LogWarning(ex, "CV processing run {RunId} failed for user {UserId}", run.Id, user.Id);
}
}
private async Task ApplyQueuedRunResultAsync(CvExtractionRun run, ApplicationUser user, string rawText, string normalizedText, StructuredCvProfile structuredCv, int? artifactId, CancellationToken cancellationToken)
{
structuredCv.Metadata.ProfileVersion = (user.CurrentCvProfileVersion ?? 0) + 1;
structuredCv.Metadata.AppliedExtractionRunId = run.Id;
structuredCv.Metadata.UpdatedAtUtc = DateTimeOffset.UtcNow;
var structuredJson = StructuredCvProfileJson.Serialize(structuredCv);
run.RawExtractedText = rawText;
run.NormalizedText = normalizedText;
run.StructuredProfileJson = structuredJson;
run.Status = "applied";
run.CompletedAtUtc = DateTimeOffset.UtcNow;
run.AppliedAtUtc = run.CompletedAtUtc;
user.ProfileCvText = normalizedText;
user.ProfileCvStructureJson = structuredJson;
user.CurrentCvExtractionRunId = run.Id;
user.CurrentCvProfileVersion = structuredCv.Metadata.ProfileVersion;
if (artifactId.HasValue)
{
user.CurrentCvUploadArtifactId = artifactId.Value;
}
var update = await _users.UpdateAsync(user);
if (!update.Succeeded)
{
run.Status = "failed";
run.ErrorMessage = string.Join("; ", update.Errors.Select(e => e.Description));
await _db.SaveChangesAsync(cancellationToken);
throw new InvalidOperationException(run.ErrorMessage);
}
await _db.SaveChangesAsync(cancellationToken);
}
private async Task SendRunCompletionEmailAsync(ApplicationUser user, CvExtractionRun run, bool success, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(user.Email)) return;
var subject = success ? $"Your CV {run.Trigger} is complete" : $"Your CV {run.Trigger} failed";
var body = success
? $"Your CV {run.Trigger} request finished successfully.\n\nRun ID: {run.Id}\nStatus: {run.Status}\nCompleted: {run.CompletedAtUtc:O}\n"
: $"Your CV {run.Trigger} request failed.\n\nRun ID: {run.Id}\nStatus: {run.Status}\nError: {run.ErrorMessage}\nCompleted: {run.CompletedAtUtc:O}\n";
try
{
await _emailSender.SendAsync(user.Email, subject, body, cancellationToken);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "CV processing completion email failed for run {RunId} user {UserId}", run.Id, user.Id);
}
}
private static void AnnotateStructuredCv(StructuredCvProfile profile, string method, double confidence)
{
var now = DateTimeOffset.UtcNow;
@@ -914,8 +1087,16 @@ public sealed class ProfileCvController : ControllerBase
foreach (var line in normalized.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).Take(6))
{
var cleaned = line.Trim().TrimStart('#').Trim();
cleaned = Regex.Replace(cleaned, @"(?<=[a-z])(?=[A-Z])", " ");
if (cleaned.Length < 4 || cleaned.Length > 80) continue;
if (cleaned.Contains('@') || Regex.IsMatch(cleaned, @"\d")) continue;
var nameMatch = Regex.Match(cleaned, @"^(?<name>[A-Z][A-Za-z'`.-]+(?:\s+[A-Z][A-Za-z'`.-]+){1,3})(?:\s+(?:Real Estate Agent|Store Manager|Web Developer|Developer|Engineer|Consultant|Specialist|Analyst).*)?$", RegexOptions.IgnoreCase);
if (nameMatch.Success)
{
return nameMatch.Groups["name"].Value.Trim();
}
if (!Regex.IsMatch(cleaned, @"^[A-Z][A-Za-z'`.-]+(?:\s+[A-Z][A-Za-z'`.-]+){1,4}$")) continue;
return cleaned;
}
@@ -981,8 +1162,8 @@ public sealed class ProfileCvController : ControllerBase
var normalized = parseSource.Replace("\r\n", "\n").Trim();
profile.Contact.Email = NullIfWhitespace(Regex.Match(rawSource, @"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}", RegexOptions.IgnoreCase).Value);
profile.Contact.Phone = NullIfWhitespace(Regex.Match(rawSource, @"(?<!\w)(?:\+?\d[\d\s().-]{6,}\d)", RegexOptions.IgnoreCase).Value);
profile.Contact.Website = NullIfWhitespace(Regex.Match(rawSource, @"\b(?:https?://)?(?:www\.)?[A-Z0-9.-]+\.[A-Z]{2,}(?:/[A-Z0-9._~:/?#\[\]@!$&'()*+,;=-]*)?", RegexOptions.IgnoreCase).Value);
profile.Contact.Phone = NormalizeDetectedPhone(Regex.Match(rawSource, @"(?<!\w)(?:\+?\d[\d\s().-]{6,}\d)", RegexOptions.IgnoreCase).Value);
profile.Contact.Website = ExtractPreferredWebsite(rawSource, profile.Contact.Email);
profile.Contact.LinkedIn = NullIfWhitespace(Regex.Match(rawSource, @"(?:linkedin(?:\.com)?/[A-Z0-9._~:/?#\[\]@!$&'()*+,;=-]+)", RegexOptions.IgnoreCase).Value);
profile.Contact.FullName = GuessFullName(rawSource) ?? GuessFullNameFromEmail(profile.Contact.Email);
@@ -991,12 +1172,31 @@ public sealed class ProfileCvController : ControllerBase
if (!string.IsNullOrWhiteSpace(contactSection.Content))
{
var contactFallback = StructuredCvProfileJson.FromSections(new[] { new StructuredCvSection { Name = "Contact", Content = contactSection.Content } });
profile.Contact.Location = contactFallback.Contact.Location;
profile.Contact.Headline ??= contactFallback.Contact.Headline;
profile.Contact.Location = PreferDetectedLocation(contactSection.Content, contactFallback.Contact.Location, profile.Contact.FullName);
profile.Contact.Headline ??= CleanHeadline(contactFallback.Contact.Headline, profile.Contact.FullName);
}
else
{
profile.Contact.Location = NullIfWhitespace(Regex.Match(rawSource, @"\b[A-Z][a-z]+(?:[\s-][A-Z][a-z]+)*(?:,\s*[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*){1,2}\b").Value);
profile.Contact.Location = PreferDetectedLocation(rawSource, NullIfWhitespace(Regex.Match(rawSource, @"\b[A-Z][a-z]+(?:[\s-][A-Z][a-z]+)*(?:,\s*[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*){1,2}\b").Value), profile.Contact.FullName);
}
if (string.IsNullOrWhiteSpace(profile.Contact.Location))
{
var firstTenLines = normalized.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).Take(10).ToList();
profile.Contact.Location = firstTenLines.FirstOrDefault(line =>
!line.Contains('@')
&& !Regex.IsMatch(line, @"https?://|www\.", RegexOptions.IgnoreCase)
&& Regex.IsMatch(line, @"^[A-Z][A-Za-z.' -]+(?:,\s*[A-Z][A-Za-z.' -]+)?$")
&& !line.Contains("Skills", StringComparison.OrdinalIgnoreCase)
&& !line.Contains("Summary", StringComparison.OrdinalIgnoreCase)
&& !line.Contains("Developer", StringComparison.OrdinalIgnoreCase)
&& !line.Contains("Agent", StringComparison.OrdinalIgnoreCase)
&& !string.Equals(line, profile.Contact.FullName, StringComparison.OrdinalIgnoreCase));
}
if (!string.IsNullOrWhiteSpace(profile.Contact.Location))
{
profile.Contact.Location = Regex.Replace(profile.Contact.Location, @"\bSkills\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim(' ', ',');
}
var summarySection = sections.FirstOrDefault(section => section.Name == "Professional Summary");
@@ -1006,11 +1206,15 @@ public sealed class ProfileCvController : ControllerBase
RegexOptions.IgnoreCase | RegexOptions.Singleline);
if (flattenedSummary.Success)
{
profile.Summary = SplitSentences(flattenedSummary.Groups["body"].Value, 5);
profile.Summary = SplitSentences(flattenedSummary.Groups["body"].Value, 5)
.Where(item => !Regex.IsMatch(item, @"^:?\s*https?://", RegexOptions.IgnoreCase))
.ToList();
}
else if (!string.IsNullOrWhiteSpace(summarySection.Content))
{
profile.Summary = SplitSentences(summarySection.Content, 5);
profile.Summary = SplitSentences(summarySection.Content, 5)
.Where(item => !Regex.IsMatch(item, @"^:?\s*https?://", RegexOptions.IgnoreCase))
.ToList();
}
var interestsSection = sections.FirstOrDefault(section => section.Name == "Interests");
@@ -1041,9 +1245,9 @@ public sealed class ProfileCvController : ControllerBase
}
var skills = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (Match match in Regex.Matches(rawSource, @"(?<![A-Za-z0-9])(?:C#|\.NET|ASP\.NET|SQL|JavaScript|TypeScript|Python|Ruby on Rails|Ruby|React|Azure DevOps|GitHub|CI/CD)(?![A-Za-z0-9])", RegexOptions.IgnoreCase))
foreach (var skill in ExtractSkillsHeuristically(rawSource))
{
skills.Add(match.Value.Trim());
skills.Add(skill);
}
profile.Skills = skills.ToList();
@@ -1070,6 +1274,10 @@ public sealed class ProfileCvController : ControllerBase
{
profile.Jobs = ParseJobsHeuristically(experienceSection.Content);
}
else if (profile.Jobs.Count == 0)
{
profile.Jobs = ParseJobsHeuristically(normalized);
}
if (profile.OtherSections.Count == 0 && sections.Any(section => section.Name == "General"))
{
@@ -1092,17 +1300,219 @@ public sealed class ProfileCvController : ControllerBase
.ToList();
}
private static readonly string[] ConservativeSkillHints =
{
"C#", ".NET", "ASP.NET", "SQL", "JavaScript", "TypeScript", "Python", "Ruby on Rails", "Ruby", "React", "Azure", "Azure DevOps", "GitHub", "CI/CD", "HTML5", "CSS", "MySQL", "PHP OOP", "Project management", "Revenue generation", "Business development", "Effective marketing", "Organisational capacity", "Operability and commitment", "Attention to Detail", "Property Valuation", "Retail Market Analysis", "Client Relationship Management", "Digital Marketing"
};
private static List<string> SplitListLike(string content)
{
return content
.Replace("\r\n", "\n")
.Split(new[] { '\n', ',', ';' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.Split(new[] { '\n', ',', ';', '•', '●' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.SelectMany(item => item.Contains(" ", StringComparison.Ordinal) ? Regex.Split(item, @"\s{2,}") : new[] { item })
.Select(item => item.Trim().TrimStart('-', '•', '*', ' '))
.Where(item => item.Length > 1)
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToList();
}
private static IEnumerable<string> ExtractConservativeSkills(string content)
{
foreach (var skill in ConservativeSkillHints)
{
if (Regex.IsMatch(content, $@"(?<![A-Za-z0-9]){Regex.Escape(skill)}(?![A-Za-z0-9])", RegexOptions.IgnoreCase))
{
yield return skill;
}
}
}
private static List<string> ExtractSkillsFromBullets(IEnumerable<string> bullets)
{
return ExtractConservativeSkills(string.Join("\n", bullets))
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToList();
}
private static IEnumerable<string> ExtractSkillsHeuristically(string content)
{
var yielded = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var skill in ExtractConservativeSkills(content))
{
if (yielded.Add(skill)) yield return skill;
}
var highlightsMatch = Regex.Match(content, @"(?:Highlights|Core Skills|Skills|Technical Skills|Skill Highlights|Competencies)\s*(?<body>.*?)(?=(?:Experience|Education|Languages|Interests|Projects|Certifications|$))", RegexOptions.IgnoreCase | RegexOptions.Singleline);
if (highlightsMatch.Success)
{
foreach (var item in SplitListLike(highlightsMatch.Groups["body"].Value))
{
var trimmed = item.Trim();
if (trimmed.Length >= 3 && trimmed.Length <= 80 && trimmed.Count(char.IsLetter) >= 3)
{
if (yielded.Add(trimmed)) yield return trimmed;
}
}
}
}
private static string? NormalizeDetectedPhone(string? value)
{
var trimmed = NullIfWhitespace(value);
if (trimmed is null) return null;
var digits = trimmed.Count(char.IsDigit);
if (digits < 7) return null;
var looksLikeRawCoordinates = trimmed.Contains(" -") && digits > 18 && !trimmed.Contains('+') && !trimmed.Contains('(');
if (looksLikeRawCoordinates) return null;
return trimmed;
}
private static string? NormalizeDetectedWebsite(string? value, string? email)
{
var trimmed = NullIfWhitespace(value);
if (trimmed is null) return null;
if (!trimmed.Contains('.', StringComparison.Ordinal)) return null;
if (trimmed.Contains('@')) return null;
if (trimmed.Equals("gmail.com", StringComparison.OrdinalIgnoreCase)) return null;
var candidate = trimmed.StartsWith("http", StringComparison.OrdinalIgnoreCase) ? trimmed : $"https://{trimmed}";
if (!Uri.TryCreate(candidate, UriKind.Absolute, out var uri)) return null;
if (string.IsNullOrWhiteSpace(uri.Host) || !uri.Host.Contains('.', StringComparison.Ordinal)) return null;
return trimmed.StartsWith("http", StringComparison.OrdinalIgnoreCase) ? trimmed : uri.Host;
}
private static string? ExtractPreferredWebsite(string rawSource, string? email)
{
foreach (Match match in Regex.Matches(rawSource, @"\b(?:https?://)?(?:www\.)?[A-Z0-9.-]+\.[A-Z]{2,}(?:/[A-Z0-9._~:/?#\[\]@!$&'()*+,;=-]*)?", RegexOptions.IgnoreCase))
{
var candidate = NormalizeDetectedWebsite(match.Value, email);
if (candidate is null) continue;
if (candidate.Contains("linkedin.com", StringComparison.OrdinalIgnoreCase)) continue;
return candidate;
}
return null;
}
private static string? PreferDetectedLocation(string source, string? fallback, string? fullName = null)
{
var normalizedFallback = NullIfWhitespace(fallback);
if (normalizedFallback is not null)
{
normalizedFallback = Regex.Replace(normalizedFallback, @",?\s*(Hobbies|Education)\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim(' ', ',');
}
if (IsPlausibleLocationValue(normalizedFallback, fullName))
{
return normalizedFallback;
}
var lines = source.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
foreach (var rawLine in lines.Take(10))
{
var line = Regex.Replace(rawLine, @",?\s*(Hobbies|Education)\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim(' ', ',');
if (!IsPlausibleLocationValue(line, fullName)) continue;
return line;
}
return IsPlausibleLocationValue(normalizedFallback, fullName) ? normalizedFallback : null;
}
private static bool IsPlausibleLocationValue(string? value, string? fullName)
{
var candidate = NullIfWhitespace(value);
if (candidate is null) return false;
if (LooksLikeRoleOrHeadline(candidate)) return false;
if (!string.IsNullOrWhiteSpace(fullName))
{
if (candidate.Equals(fullName, StringComparison.OrdinalIgnoreCase)) return false;
if (candidate.StartsWith(fullName + " ", StringComparison.OrdinalIgnoreCase)) return false;
}
if (candidate.Contains("Education", StringComparison.OrdinalIgnoreCase)
|| candidate.Contains("Hobbies", StringComparison.OrdinalIgnoreCase)
|| candidate.Contains("Skills", StringComparison.OrdinalIgnoreCase)
|| candidate.Contains("Summary", StringComparison.OrdinalIgnoreCase)) return false;
if (candidate.Contains('@') || Regex.IsMatch(candidate, @"https?://|www\.", RegexOptions.IgnoreCase)) return false;
if (candidate.Count(char.IsDigit) >= 5) return false;
if (Regex.IsMatch(candidate, @"^\d+\s+.+")) return true;
var normalized = Regex.Replace(candidate, @"\s+", " ").Trim(' ', ',');
if (normalized.Length > 80) return false;
if (Regex.IsMatch(normalized, @"^[A-Z][A-Za-z.' -]+,\s*[A-Z][A-Za-z.' -]+(?:,\s*[A-Z][A-Za-z.' -]+)?$")) return true;
if (Regex.IsMatch(normalized, @"^[A-Z][A-Za-z.' -]+(?:\s+[A-Z][A-Za-z.' -]+){0,2}$") && !LooksLikeRoleOrHeadline(normalized)) return true;
return false;
}
private static bool LooksLikeRoleOrHeadline(string value)
{
return Regex.IsMatch(value, @"\b(real estate agent|developer|engineer|manager|consultant|specialist|analyst|designer|technician|administrator|architect|director|coordinator|assistant|lead|owner|founder|recruiter|teacher|writer|producer|officer|supervisor|sales)\b", RegexOptions.IgnoreCase);
}
private static bool LooksLikePersonName(string value)
{
return Regex.IsMatch(value, @"^[A-Z][A-Za-z'`.-]+(?:\s+[A-Z][A-Za-z'`.-]+){1,3}$")
&& !LooksLikeRoleOrHeadline(value);
}
private static bool ArePlausibleJobs(List<StructuredCvJob>? jobs, string? fullName)
{
if (jobs is null || jobs.Count == 0) return false;
return jobs.Any(job => IsPlausibleJob(job, fullName));
}
private static int ScoreJobs(List<StructuredCvJob>? jobs, string? fullName)
{
if (jobs is null || jobs.Count == 0) return 0;
var first = jobs[0];
var score = 0;
if (IsPlausibleJob(first, fullName)) score += 5;
if (!string.IsNullOrWhiteSpace(first.Title) && LooksLikeRoleOrHeadline(first.Title)) score += 4;
if (!string.IsNullOrWhiteSpace(first.Company)) score += 2;
if (!string.IsNullOrWhiteSpace(first.Start) || !string.IsNullOrWhiteSpace(first.End)) score += 2;
if (first.Bullets.Count > 0) score += 2;
score += Math.Min(jobs.Count, 3);
return score;
}
private static bool IsPlausibleJob(StructuredCvJob? job, string? fullName)
{
if (job is null) return false;
var title = NullIfWhitespace(job.Title);
var company = NullIfWhitespace(job.Company);
var location = NullIfWhitespace(job.Location);
var hasEvidence = !string.IsNullOrWhiteSpace(company)
|| !string.IsNullOrWhiteSpace(location)
|| !string.IsNullOrWhiteSpace(job.Start)
|| !string.IsNullOrWhiteSpace(job.End)
|| job.Bullets.Count > 0;
if (title is null) return hasEvidence;
if (!string.IsNullOrWhiteSpace(fullName) && title.Equals(fullName, StringComparison.OrdinalIgnoreCase)) return false;
if (LooksLikePersonName(title)) return false;
if (title.Contains('@') || Regex.IsMatch(title, @"https?://|www\.", RegexOptions.IgnoreCase)) return false;
if (Regex.IsMatch(title, @"^(?:\d{2}/\d{4}|\d{4})\s*(?:[-]|to)\s*(?:\d{2}/\d{4}|\d{4}|Present|Current)$", RegexOptions.IgnoreCase)) return false;
if (!hasEvidence && !LooksLikeRoleOrHeadline(title)) return false;
return true;
}
private static string? CleanHeadline(string? value, string? fullName)
{
var trimmed = NullIfWhitespace(value);
if (trimmed is null) return null;
if (!string.IsNullOrWhiteSpace(fullName) && trimmed.Equals(fullName, StringComparison.OrdinalIgnoreCase)) return null;
if (trimmed.Contains('@') || trimmed.Count(char.IsDigit) > 3) return null;
return trimmed;
}
private static List<StructuredCvLanguage> ParseLanguagesHeuristically(string content)
{
var languages = new List<StructuredCvLanguage>();
@@ -1129,7 +1539,8 @@ public sealed class ProfileCvController : ControllerBase
private static List<StructuredCvEducation> ParseEducationHeuristically(string content)
{
var blocks = Regex.Split(content, @"\n\s*\n")
var normalized = content.Replace("\r\n", "\n").Trim();
var blocks = Regex.Split(normalized, @"\n\s*\n|(?=###\s+)|(?=(?:Bachelor|Master|Doctor|Associate|Diploma|Certificate|BSc|BA|MSc|MA|PhD)\b)", RegexOptions.IgnoreCase)
.Select(block => block.Trim())
.Where(block => block.Length > 0)
.ToList();
@@ -1137,6 +1548,13 @@ public sealed class ProfileCvController : ControllerBase
var items = new List<StructuredCvEducation>();
foreach (var block in blocks)
{
var candidate = StructuredCvProfileJson.FromSections(new[] { new StructuredCvSection { Name = "Education", Content = block } }).Education;
if (candidate.Count > 0)
{
items.AddRange(candidate);
continue;
}
var lines = block.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
if (lines.Count == 0) continue;
@@ -1162,7 +1580,88 @@ public sealed class ProfileCvController : ControllerBase
private static List<StructuredCvJob> ParseJobsHeuristically(string content)
{
var normalized = content.Replace("\r\n", "\n");
var normalized = content.Replace("\r\n", "\n").Trim();
var structured = StructuredCvProfileJson.FromSections(new[] { new StructuredCvSection { Name = "Work Experience", Content = normalized } }).Jobs;
if (ArePlausibleJobs(structured, null))
{
return structured;
}
var simpleLines = normalized.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
var inlineDateIndex = Array.FindIndex(simpleLines, line => Regex.IsMatch(line, @".+\d{2}/\d{4}\s+to\s+\d{2}/\d{4}", RegexOptions.IgnoreCase) || Regex.IsMatch(line, @".+\d{4}\s*(?:[-]|to)\s*(?:\d{4}|Present|Current)", RegexOptions.IgnoreCase));
if (inlineDateIndex >= 0)
{
var titleLine = Regex.Replace(simpleLines[inlineDateIndex], @"\s*[-]?\s*\d{2}/\d{4}\s+to\s+\d{2}/\d{4}.*$", string.Empty, RegexOptions.IgnoreCase);
titleLine = Regex.Replace(titleLine, @"\s*[-]?\s*\d{4}\s*[-]\s*(?:\d{4}|Present|Current).*$", string.Empty, RegexOptions.IgnoreCase).Trim();
var companyOrLocation = inlineDateIndex + 1 < simpleLines.Length ? simpleLines[inlineDateIndex + 1] : null;
var datesMatch = Regex.Match(simpleLines[inlineDateIndex], @"(\d{2}/\d{4}|\d{4})\s*(?:to|[-])\s*(\d{2}/\d{4}|\d{4}|Present|Current)", RegexOptions.IgnoreCase);
var bullets = simpleLines.Skip(inlineDateIndex + 2).Where(line => line.Length > 12).ToList();
if (!string.IsNullOrWhiteSpace(titleLine))
{
return new List<StructuredCvJob>
{
new StructuredCvJob
{
Title = titleLine,
Company = companyOrLocation,
Start = datesMatch.Success ? datesMatch.Groups[1].Value : null,
End = datesMatch.Success ? datesMatch.Groups[2].Value : null,
IsCurrent = datesMatch.Success && (string.Equals(datesMatch.Groups[2].Value, "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(datesMatch.Groups[2].Value, "Current", StringComparison.OrdinalIgnoreCase)),
Bullets = bullets,
Skills = ExtractSkillsFromBullets(bullets),
}
};
}
}
var dateIndex = Array.FindIndex(simpleLines, line => Regex.IsMatch(line, @"(?:\d{2}/\d{4}|\d{4})\s*(?:[-]|to)\s*(?:\d{2}/\d{4}|\d{4}|Present|Current)", RegexOptions.IgnoreCase));
if (dateIndex >= 0)
{
if (dateIndex + 2 < simpleLines.Length && LooksLikeRoleOrHeadline(simpleLines[dateIndex + 1]))
{
var datesLine = simpleLines[dateIndex];
var titleLine = simpleLines[dateIndex + 1];
var companyLine = simpleLines[dateIndex + 2];
var bullets = SplitSentences(string.Join(" ", simpleLines.Skip(dateIndex + 3)), 6);
var parts = Regex.Split(datesLine, @"\s*[-]\s*");
return new List<StructuredCvJob>
{
new StructuredCvJob
{
Title = titleLine,
Company = companyLine,
Start = parts.FirstOrDefault(),
End = parts.Skip(1).FirstOrDefault(),
IsCurrent = string.Equals(parts.Skip(1).FirstOrDefault(), "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(parts.Skip(1).FirstOrDefault(), "Current", StringComparison.OrdinalIgnoreCase),
Bullets = bullets,
Skills = ExtractSkillsFromBullets(bullets),
}
};
}
if (dateIndex >= 2)
{
var titleLine = simpleLines[dateIndex - 2];
var locationLine = simpleLines[dateIndex - 1];
var datesLine = simpleLines[dateIndex];
var bullets = simpleLines.Skip(dateIndex + 1).Where(line => line.Length > 12).ToList();
var parts = Regex.Split(datesLine, @"\s*[-]\s*");
return new List<StructuredCvJob>
{
new StructuredCvJob
{
Title = titleLine,
Location = locationLine,
Start = parts.FirstOrDefault(),
End = parts.Skip(1).FirstOrDefault(),
IsCurrent = string.Equals(parts.Skip(1).FirstOrDefault(), "Present", StringComparison.OrdinalIgnoreCase) || string.Equals(parts.Skip(1).FirstOrDefault(), "Current", StringComparison.OrdinalIgnoreCase),
Bullets = bullets,
Skills = ExtractSkillsFromBullets(bullets),
}
};
}
}
var pattern = new Regex(@"(?<title>[A-Z][A-Z\s/&-]{3,})\s*\n(?<dates>\d{4}\s*[-]\s*(?:\d{4}|Present|Current))(?<body>.*?)(?=(?:\n[A-Z][A-Z\s/&-]{3,}\s*\n\d{4}\s*[-]\s*(?:\d{4}|Present|Current))|\z)", RegexOptions.Singleline);
var jobs = new List<StructuredCvJob>();
@@ -1181,7 +1680,7 @@ public sealed class ProfileCvController : ControllerBase
End = NullIfWhitespace(dates.Skip(1).FirstOrDefault()),
IsCurrent = string.Equals(dates.Skip(1).FirstOrDefault(), "present", StringComparison.OrdinalIgnoreCase) || string.Equals(dates.Skip(1).FirstOrDefault(), "current", StringComparison.OrdinalIgnoreCase),
Bullets = bullets,
Skills = bullets.SelectMany(SplitListLike).Where(item => Regex.IsMatch(item, @"^(?:C#|\.NET|ASP\.NET|SQL|JavaScript|TypeScript|Python|Ruby on Rails|Ruby|React|Azure DevOps|GitHub|CI/CD)$", RegexOptions.IgnoreCase)).Distinct(StringComparer.OrdinalIgnoreCase).ToList(),
Skills = ExtractSkillsFromBullets(bullets),
});
}
@@ -1545,7 +2044,20 @@ public sealed class ProfileCvController : ControllerBase
private async Task<string> MaybeReconstructStructuredCvAsync(string text, CancellationToken cancellationToken)
{
var normalized = text.Trim();
if (!LooksLikeFlattenedCvExtraction(normalized))
var forceAiNormalizer = string.Equals(Environment.GetEnvironmentVariable("CV_FORCE_AI_NORMALIZER"), "true", StringComparison.OrdinalIgnoreCase);
if (forceAiNormalizer)
{
var forced = await _cvAiNormalizer.NormalizeAsync(normalized, cancellationToken);
if (!string.IsNullOrWhiteSpace(forced?.NormalizedText))
{
return forced.NormalizedText.Trim();
}
}
var looksFlattened = LooksLikeFlattenedCvExtraction(normalized);
var hasRecoverableSignals = HasRecoverableSectionSignals(normalized);
if (!looksFlattened && hasRecoverableSignals)
{
return normalized;
}
@@ -1556,7 +2068,17 @@ public sealed class ProfileCvController : ControllerBase
2800,
900);
return string.IsNullOrWhiteSpace(reconstructed) ? normalized : reconstructed.Trim();
var candidate = string.IsNullOrWhiteSpace(reconstructed) ? normalized : reconstructed.Trim();
if (LooksLikeFlattenedCvExtraction(candidate) || !HasRecoverableSectionSignals(candidate))
{
var aiNormalized = await _cvAiNormalizer.NormalizeAsync(normalized, cancellationToken);
if (!string.IsNullOrWhiteSpace(aiNormalized?.NormalizedText))
{
return aiNormalized.NormalizedText.Trim();
}
}
return candidate;
}
private static bool LooksLikeFlattenedCvExtraction(string text)
@@ -1575,6 +2097,74 @@ public sealed class ProfileCvController : ControllerBase
|| (normalized.Contains(" + ") && bulletCount > 0 && lineCount <= 10);
}
private static bool LooksLikeNormalizedMarkdownCv(string text)
{
if (string.IsNullOrWhiteSpace(text)) return false;
return Regex.IsMatch(text, @"(?im)^#\s+(Contact|Professional Summary|Work Experience|Education|Skills|Languages|Interests)\s*$");
}
private static StructuredCvProfile BuildStructuredCvFromNormalizedMarkdown(string text)
{
var sections = ParseSections(text)
.Select(section => new StructuredCvSection
{
Name = section.Name,
Content = section.Content,
WordCount = CountWords(section.Content),
})
.ToList();
var profile = StructuredCvProfileJson.FromSections(sections);
profile.Sections = sections;
if (string.IsNullOrWhiteSpace(profile.Contact.FullName))
{
profile.Contact.FullName = GuessFullName(text) ?? GuessFullNameFromEmail(profile.Contact.Email);
}
var contactSection = sections.FirstOrDefault(section => section.Name == "Contact");
profile.Contact.Location = PreferDetectedLocation(contactSection.Content ?? text, profile.Contact.Location, profile.Contact.FullName);
profile.Summary = CondenseSummary(profile.Summary);
profile.Skills = OrderSkills(profile.Skills);
profile.Interests = CleanInterestItems(profile.Interests);
foreach (var job in profile.Jobs)
{
job.Bullets = job.Bullets.Where(bullet => !bullet.Contains("Detail not specified", StringComparison.OrdinalIgnoreCase)).ToList();
}
foreach (var education in profile.Education)
{
education.Details = education.Details.Where(detail => !detail.Contains("Detail not specified", StringComparison.OrdinalIgnoreCase)).ToList();
}
return profile;
}
private static List<string> CondenseSummary(List<string> summary)
{
if (summary.Count <= 1) return summary;
var joined = string.Join(" ", summary).Trim();
return string.IsNullOrWhiteSpace(joined) ? new List<string>() : new List<string> { joined };
}
private static List<string> OrderSkills(List<string> skills)
{
return skills
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(skill => skill, StringComparer.OrdinalIgnoreCase)
.ToList();
}
private static List<string> CleanInterestItems(List<string> interests)
{
return interests
.Where(item => !item.Contains("linkedin", StringComparison.OrdinalIgnoreCase)
&& !item.Contains("realtor", StringComparison.OrdinalIgnoreCase)
&& !Regex.IsMatch(item, @"https?://|www\.", RegexOptions.IgnoreCase))
.ToList();
}
private static string? CanonicalizeSectionHeading(string line)
{
if (string.IsNullOrWhiteSpace(line)) return null;
@@ -1592,6 +2182,14 @@ public sealed class ProfileCvController : ControllerBase
return SectionAliases.TryGetValue(normalized, out var canonical) ? canonical : null;
}
private static bool HasRecoverableSectionSignals(string text)
{
var sections = ParseSections(text);
return sections.Any(section => !string.Equals(section.Name, "General", StringComparison.OrdinalIgnoreCase))
|| Regex.IsMatch(text, @"(?im)^\s*(Contact|Professional Summary|Summary|Work Experience|Experience|Education|Skills|Languages|Interests)\s*:?")
|| Regex.IsMatch(text, @"(?im)^\s*#\s*(Contact|Professional Summary|Summary|Work Experience|Experience|Education|Skills|Languages|Interests)");
}
private static async Task<string> ExtractTextAsync(IFormFile file, string extension)
{
if (string.Equals(extension, ".txt", StringComparison.OrdinalIgnoreCase) || string.Equals(extension, ".md", StringComparison.OrdinalIgnoreCase))