feat: add server-backed profile CV builder pipeline

This commit is contained in:
2026-04-01 12:25:35 +02:00
parent 22d7dd3573
commit 0551a525a8
7 changed files with 625 additions and 23 deletions
+164
View File
@@ -67,6 +67,8 @@ public static class StructuredCvProfileJson
: primary.Summary.Concat(secondary.Summary).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
if (primary.Jobs.Count == 0) primary.Jobs = secondary.Jobs;
if (primary.Education.Count == 0) primary.Education = secondary.Education;
if (primary.Certifications.Count == 0) primary.Certifications = secondary.Certifications;
if (primary.Projects.Count == 0) primary.Projects = secondary.Projects;
primary.Skills = primary.Skills.Count == 0
? secondary.Skills
: primary.Skills.Concat(secondary.Skills).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
@@ -132,6 +134,14 @@ public static class StructuredCvProfileJson
case "education":
profile.Education = ParseEducation(section.Content);
break;
case "certifications":
case "certificates":
profile.Certifications = ParseCertifications(section.Content);
break;
case "projects":
case "selected projects":
profile.Projects = ParseProjects(section.Content);
break;
default:
profile.OtherSections.Add(new StructuredCvOtherSection
{
@@ -165,6 +175,18 @@ public static class StructuredCvProfileJson
|| !string.IsNullOrWhiteSpace(education.Institution)
|| education.Details.Count > 0)
.ToList();
profile.Certifications = (profile.Certifications ?? new List<StructuredCvCertification>())
.Select(NormalizeCertification)
.Where(certification => !string.IsNullOrWhiteSpace(certification.Name)
|| !string.IsNullOrWhiteSpace(certification.Issuer)
|| certification.Details.Count > 0)
.ToList();
profile.Projects = (profile.Projects ?? new List<StructuredCvProject>())
.Select(NormalizeProject)
.Where(project => !string.IsNullOrWhiteSpace(project.Name)
|| !string.IsNullOrWhiteSpace(project.Role)
|| project.Bullets.Count > 0)
.ToList();
profile.Skills = CleanList(profile.Skills);
profile.Languages = (profile.Languages ?? new List<StructuredCvLanguage>())
.Select(NormalizeLanguage)
@@ -299,6 +321,8 @@ public static class StructuredCvProfileJson
if (trimmed.Any(char.IsDigit) || trimmed.Length > 80) return null;
var normalized = Regex.Replace(trimmed, @"\s+[A-Z](?:\s+[A-Z]){2,}(?:\b.*)?$", string.Empty).Trim();
normalized = Regex.Replace(normalized, @"\b(?:remote|hybrid)\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim();
normalized = Regex.Replace(normalized, @"\b(?:sales representative|developer|engineer|manager|consultant|analyst|designer|specialist|technician)\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim();
normalized = Regex.Replace(normalized, @"\s+", " ").Trim(' ', '|', ';', ':');
var parts = normalized.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (parts.Length == 0 || parts.Length > 4) return null;
@@ -421,10 +445,24 @@ public static class StructuredCvProfileJson
return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed;
}
private static string? NormalizeQualificationLevel(string? explicitValue, string? qualificationText)
{
var candidate = TrimOrNull(explicitValue) ?? TrimOrNull(qualificationText);
if (candidate is null) return null;
if (Regex.IsMatch(candidate, @"\b(phd|doctorate|dphil)\b", RegexOptions.IgnoreCase)) return "PhD";
if (Regex.IsMatch(candidate, @"\b(master(?:'s)?|msc|m\.sc|ma|m\.a|mba|meng|meng)\b", RegexOptions.IgnoreCase)) return "Master";
if (Regex.IsMatch(candidate, @"\b(bachelor(?:'s)?|bsc|b\.sc|ba|b\.a|beng|llb|undergraduate degree)\b", RegexOptions.IgnoreCase)) return "Bachelor";
if (Regex.IsMatch(candidate, @"\b(diploma|certificate|certification|nvq|btec|level\s*\d+|apprenticeship|associate degree)\b", RegexOptions.IgnoreCase)) return "Diploma/Certificate";
if (Regex.IsMatch(candidate, @"\b(gcse|a-?level|secondary|high school|gymnasium)\b", RegexOptions.IgnoreCase)) return "Secondary";
return "Other";
}
private static StructuredCvEducation NormalizeEducation(StructuredCvEducation? education)
{
education ??= new StructuredCvEducation();
education.Qualification = NormalizeQualification(education.Qualification);
education.QualificationLevel = NormalizeQualificationLevel(education.QualificationLevel, education.Qualification);
education.Institution = NormalizeInstitution(education.Institution);
education.Location = NormalizeLocationValue(education.Location);
education.Start = NormalizeDateValue(education.Start);
@@ -438,12 +476,41 @@ public static class StructuredCvProfileJson
if (qualificationLooksInstitutional && institutionLooksQualification)
{
(education.Qualification, education.Institution) = (education.Institution, education.Qualification);
education.QualificationLevel = NormalizeQualificationLevel(education.QualificationLevel, education.Qualification);
}
}
return education;
}
private static StructuredCvCertification NormalizeCertification(StructuredCvCertification? certification)
{
certification ??= new StructuredCvCertification();
certification.Name = NormalizeQualification(certification.Name);
certification.Issuer = NormalizeInstitution(certification.Issuer);
certification.Location = NormalizeLocationValue(certification.Location);
certification.Date = NormalizeDateValue(certification.Date);
certification.Details = CleanList(certification.Details);
return certification;
}
private static StructuredCvProject NormalizeProject(StructuredCvProject? project)
{
project ??= new StructuredCvProject();
project.Name = NormalizeQualification(project.Name);
project.Role = NormalizeJobTitle(project.Role);
project.Location = NormalizeLocationValue(project.Location);
project.Start = NormalizeDateValue(project.Start);
project.End = NormalizeDateValue(project.End);
project.Bullets = CleanList(project.Bullets)
.Select(NormalizeBullet)
.Where(bullet => bullet is not null)
.Select(bullet => bullet!)
.ToList();
project.Skills = CleanList(project.Skills);
return project;
}
private static StructuredCvLanguage NormalizeLanguage(StructuredCvLanguage? language)
{
language ??= new StructuredCvLanguage();
@@ -512,12 +579,42 @@ public static class StructuredCvProfileJson
AddIf(lines, $"### {education.Qualification}".Trim());
var meta = string.Join(" | ", new[] { education.Institution, education.Location, FormatDateRange(education.Start, education.End, false) }.Where(value => !string.IsNullOrWhiteSpace(value)));
AddIf(lines, meta);
if (!string.IsNullOrWhiteSpace(education.QualificationLevel)) AddIf(lines, $"Level: {education.QualificationLevel}");
lines.AddRange(education.Details.Select(detail => $"- {detail}"));
if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty);
}
AddSectionIfAny(sections, "Education", lines);
}
if (profile.Certifications.Count > 0)
{
var lines = new List<string>();
foreach (var certification in profile.Certifications)
{
AddIf(lines, $"### {certification.Name}".Trim());
var meta = string.Join(" | ", new[] { certification.Issuer, certification.Location, certification.Date }.Where(value => !string.IsNullOrWhiteSpace(value)));
AddIf(lines, meta);
lines.AddRange(certification.Details.Select(detail => $"- {detail}"));
if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty);
}
AddSectionIfAny(sections, "Certifications", lines);
}
if (profile.Projects.Count > 0)
{
var lines = new List<string>();
foreach (var project in profile.Projects)
{
AddIf(lines, $"### {project.Name}".Trim());
var meta = string.Join(" | ", new[] { project.Role, project.Location, FormatDateRange(project.Start, project.End, false) }.Where(value => !string.IsNullOrWhiteSpace(value)));
AddIf(lines, meta);
lines.AddRange(project.Bullets.Select(bullet => $"- {bullet}"));
if (project.Skills.Count > 0) AddIf(lines, $"Skills: {string.Join(", ", project.Skills)}");
if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty);
}
AddSectionIfAny(sections, "Projects", lines);
}
AddSectionIfAny(sections, "Skills", profile.Skills);
if (profile.Languages.Count > 0)
@@ -692,9 +789,76 @@ public static class StructuredCvProfileJson
if (metadataWithoutDates.Count > 1) education.Location = metadataWithoutDates[1].NullIfWhitespace();
education.Details = lines.Skip(1).Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
education.QualificationLevel = NormalizeQualificationLevel(null, education.Qualification);
return string.IsNullOrWhiteSpace(education.Qualification) && string.IsNullOrWhiteSpace(education.Institution) && education.Details.Count == 0 ? null : education;
}
private static List<StructuredCvCertification> ParseCertifications(string content)
{
var blocks = SplitBlocks(content);
return blocks.Select(ParseCertificationBlock).Where(certification => certification is not null).Select(certification => certification!).ToList();
}
private static StructuredCvCertification? ParseCertificationBlock(string block)
{
var lines = block.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
if (lines.Count == 0) return null;
var certification = new StructuredCvCertification();
if (lines[0].StartsWith("###", StringComparison.Ordinal)) lines[0] = lines[0].TrimStart('#', ' ');
certification.Name = lines[0].NullIfWhitespace();
var metadata = lines.Skip(1).TakeWhile(line => !IsBullet(line)).ToList();
certification.Date = metadata.Select(line => Regex.Match(line, @"(?:(?:\w+\s+)?\d{4}|Present|Current)", RegexOptions.IgnoreCase).Value.NullIfWhitespace()).FirstOrDefault(value => value is not null);
var metadataWithoutDates = metadata
.Select(line => string.IsNullOrWhiteSpace(certification.Date) ? line : line.Replace(certification.Date, string.Empty))
.Select(line => line.Trim(' ', '|', ',', '-'))
.Where(line => !string.IsNullOrWhiteSpace(line))
.ToList();
if (metadataWithoutDates.Count > 0) certification.Issuer = metadataWithoutDates[0].NullIfWhitespace();
if (metadataWithoutDates.Count > 1) certification.Location = metadataWithoutDates[1].NullIfWhitespace();
certification.Details = lines.Skip(1).Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
return string.IsNullOrWhiteSpace(certification.Name) && string.IsNullOrWhiteSpace(certification.Issuer) ? null : certification;
}
private static List<StructuredCvProject> ParseProjects(string content)
{
var blocks = SplitBlocks(content);
return blocks.Select(ParseProjectBlock).Where(project => project is not null).Select(project => project!).ToList();
}
private static StructuredCvProject? ParseProjectBlock(string block)
{
var lines = block.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList();
if (lines.Count == 0) return null;
var project = new StructuredCvProject();
if (lines[0].StartsWith("###", StringComparison.Ordinal)) lines[0] = lines[0].TrimStart('#', ' ');
project.Name = lines[0].NullIfWhitespace();
var metadata = lines.Skip(1).TakeWhile(line => !IsBullet(line) && !line.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)).ToList();
var dateValue = metadata.Select(line => Regex.Match(line, @"(?:(?:\w+\s+)?\d{4}|Present|Current)(?:\s*[-]\s*(?:(?:\w+\s+)?\d{4}|Present|Current))?", RegexOptions.IgnoreCase).Value.NullIfWhitespace()).FirstOrDefault(value => value is not null);
if (!string.IsNullOrWhiteSpace(dateValue))
{
var parts = Regex.Split(dateValue, "\\s*[-]\\s*");
project.Start = parts.FirstOrDefault().NullIfWhitespace();
project.End = parts.Skip(1).FirstOrDefault().NullIfWhitespace();
}
var metadataWithoutDates = metadata
.Select(line => string.IsNullOrWhiteSpace(dateValue) ? line : line.Replace(dateValue, string.Empty))
.Select(line => line.Trim(' ', '|', ',', '-'))
.Where(line => !string.IsNullOrWhiteSpace(line))
.ToList();
if (metadataWithoutDates.Count > 0) project.Role = metadataWithoutDates[0].NullIfWhitespace();
if (metadataWithoutDates.Count > 1) project.Location = metadataWithoutDates[1].NullIfWhitespace();
project.Bullets = lines.Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList();
project.Skills = lines
.Where(line => line.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase))
.SelectMany(line => SplitList(line[(line.IndexOf(':') + 1)..]))
.ToList();
return string.IsNullOrWhiteSpace(project.Name) && string.IsNullOrWhiteSpace(project.Role) && project.Bullets.Count == 0 ? null : project;
}
private static List<string> SplitBlocks(string content)
{
var normalized = content.Replace("\r\n", "\n").Trim();