using System.Text.Json; using System.Text.Json.Serialization; using System.Text.RegularExpressions; namespace JobTrackerApi.Models; public static class StructuredCvProfileJson { private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) { PropertyNameCaseInsensitive = true, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, }; private static readonly HashSet NonLocationTokens = new(StringComparer.OrdinalIgnoreCase) { "python", "ruby", "sql", "mysql", "postgresql", "postgres", "sqlite", "javascript", "typescript", "react", "node", "node.js", "c#", ".net", "asp.net", "java", "azure", "aws", "gcp", "docker", "kubernetes", "terraform", "git", "github", "gitlab", "ci/cd", "rest", "graphql", "php", "golang", "go" }; public static StructuredCvProfile Empty() => Normalize(new StructuredCvProfile()); public static StructuredCvProfile Deserialize(string? json) { if (string.IsNullOrWhiteSpace(json)) return Empty(); try { using var doc = JsonDocument.Parse(json); if (doc.RootElement.ValueKind == JsonValueKind.Array) { var sections = JsonSerializer.Deserialize>(json, SerializerOptions) ?? new List(); return FromSections(sections); } if (doc.RootElement.ValueKind != JsonValueKind.Object) return Empty(); var profile = JsonSerializer.Deserialize(json, SerializerOptions) ?? new StructuredCvProfile(); return Normalize(profile); } catch { return Empty(); } } public static string Serialize(StructuredCvProfile? profile) { return JsonSerializer.Serialize(Normalize(profile), SerializerOptions); } public static StructuredCvProfile Merge(StructuredCvProfile? preferred, StructuredCvProfile? fallback) { var primary = Normalize(preferred); var secondary = Normalize(fallback); primary.Contact.FullName ??= secondary.Contact.FullName; primary.Contact.Headline ??= secondary.Contact.Headline; primary.Contact.Email ??= secondary.Contact.Email; primary.Contact.Phone ??= secondary.Contact.Phone; primary.Contact.Location ??= secondary.Contact.Location; primary.Contact.Website ??= secondary.Contact.Website; primary.Contact.LinkedIn ??= secondary.Contact.LinkedIn; primary.Summary = primary.Summary.Count == 0 ? secondary.Summary : primary.Summary.Concat(secondary.Summary).Distinct(StringComparer.OrdinalIgnoreCase).ToList(); if (primary.Jobs.Count == 0) primary.Jobs = secondary.Jobs; if (primary.Education.Count == 0) primary.Education = secondary.Education; if (primary.Certifications.Count == 0) primary.Certifications = secondary.Certifications; if (primary.Projects.Count == 0) primary.Projects = secondary.Projects; primary.Skills = primary.Skills.Count == 0 ? secondary.Skills : primary.Skills.Concat(secondary.Skills).Distinct(StringComparer.OrdinalIgnoreCase).ToList(); primary.Languages = primary.Languages.Count == 0 ? secondary.Languages : primary.Languages .Concat(secondary.Languages) .GroupBy(language => language.Name ?? string.Empty, StringComparer.OrdinalIgnoreCase) .Select(group => group.First()) .ToList(); primary.Interests = primary.Interests.Count == 0 ? secondary.Interests : primary.Interests.Concat(secondary.Interests).Distinct(StringComparer.OrdinalIgnoreCase).ToList(); if (primary.OtherSections.Count == 0) primary.OtherSections = secondary.OtherSections; if (primary.Sections.Count == 0) primary.Sections = secondary.Sections; foreach (var entry in secondary.Metadata.Fields) { if (!primary.Metadata.Fields.ContainsKey(entry.Key)) { primary.Metadata.Fields[entry.Key] = entry.Value; } } return Normalize(primary); } public static StructuredCvProfile FromSections(IEnumerable? sections) { var normalizedSections = NormalizeSections(sections); var profile = new StructuredCvProfile { Sections = normalizedSections, }; foreach (var section in normalizedSections) { switch (section.Name.Trim().ToLowerInvariant()) { case "contact": ApplyContact(profile.Contact, section.Content); break; case "professional summary": case "summary": profile.Summary = SplitList(section.Content); break; case "skills": case "core skills": case "technical skills": profile.Skills = SplitList(section.Content); break; case "languages": profile.Languages = ParseLanguages(section.Content); break; case "interests": profile.Interests = SplitList(section.Content); break; case "work experience": case "experience": case "employment history": profile.Jobs = ParseJobs(section.Content); break; case "education": profile.Education = ParseEducation(section.Content); break; case "certifications": case "certificates": profile.Certifications = ParseCertifications(section.Content); break; case "projects": case "selected projects": profile.Projects = ParseProjects(section.Content); break; default: profile.OtherSections.Add(new StructuredCvOtherSection { Title = section.Name, Items = SplitList(section.Content), }); break; } } return Normalize(profile); } public static StructuredCvProfile Normalize(StructuredCvProfile? profile) { profile ??= new StructuredCvProfile(); profile.Version = string.IsNullOrWhiteSpace(profile.Version) ? "1" : profile.Version.Trim(); profile.Metadata ??= new StructuredCvMetadata(); profile.Metadata.Fields ??= new Dictionary(); profile.Contact = NormalizeContact(profile.Contact); profile.Summary = CleanList(profile.Summary); profile.Jobs = (profile.Jobs ?? new List()) .Select(NormalizeJob) .Where(job => !string.IsNullOrWhiteSpace(job.Title) || !string.IsNullOrWhiteSpace(job.Company) || job.Bullets.Count > 0) .ToList(); profile.Education = (profile.Education ?? new List()) .Select(NormalizeEducation) .Where(education => !string.IsNullOrWhiteSpace(education.Qualification) || !string.IsNullOrWhiteSpace(education.Institution) || education.Details.Count > 0) .ToList(); profile.Certifications = (profile.Certifications ?? new List()) .Select(NormalizeCertification) .Where(certification => !string.IsNullOrWhiteSpace(certification.Name) || !string.IsNullOrWhiteSpace(certification.Issuer) || certification.Details.Count > 0) .ToList(); profile.Projects = (profile.Projects ?? new List()) .Select(NormalizeProject) .Where(project => !string.IsNullOrWhiteSpace(project.Name) || !string.IsNullOrWhiteSpace(project.Role) || project.Bullets.Count > 0) .ToList(); profile.Skills = CleanList(profile.Skills); profile.Languages = (profile.Languages ?? new List()) .Select(NormalizeLanguage) .Where(language => !string.IsNullOrWhiteSpace(language.Name)) .ToList(); profile.Interests = CleanList(profile.Interests); profile.OtherSections = (profile.OtherSections ?? new List()) .Select(section => new StructuredCvOtherSection { Title = TrimOrNull(section?.Title), Items = CleanList(section?.Items), }) .Where(section => !string.IsNullOrWhiteSpace(section.Title) || section.Items.Count > 0) .ToList(); var normalizedSections = NormalizeSections(profile.Sections); profile.Sections = normalizedSections.Count > 0 ? normalizedSections : BuildSections(profile); return profile; } private static StructuredCvContact NormalizeContact(StructuredCvContact? contact) { contact ??= new StructuredCvContact(); contact.FullName = TrimOrNull(contact.FullName); contact.Headline = TrimOrNull(contact.Headline); contact.Email = TrimOrNull(contact.Email); contact.Phone = TrimOrNull(contact.Phone); contact.Location = NormalizeLocationValue(contact.Location); contact.Website = NormalizeWebsite(contact.Website); contact.LinkedIn = NormalizeLinkedIn(contact.LinkedIn); return contact; } private static StructuredCvJob NormalizeJob(StructuredCvJob? job) { job ??= new StructuredCvJob(); var title = NormalizeJobTitle(job.Title); var company = NormalizeCompanyName(job.Company); var location = NormalizeLocationValue(job.Location); if (!string.IsNullOrWhiteSpace(title) && company is null) { var atSplit = Regex.Match(title, @"^(?.+?)\s+at\s+(?<company>.+)$", RegexOptions.IgnoreCase); if (atSplit.Success) { title = NormalizeJobTitle(atSplit.Groups["title"].Value); company = NormalizeCompanyName(atSplit.Groups["company"].Value); } } if (!string.IsNullOrWhiteSpace(title) && !string.IsNullOrWhiteSpace(company)) { var titleLooksLikeCompany = LooksLikeCompanyName(title) && !LooksLikeJobTitle(title); var companyLooksLikeTitle = LooksLikeJobTitle(company) && !LooksLikeCompanyName(company); if (titleLooksLikeCompany && companyLooksLikeTitle) { (title, company) = (company, title); } } if (!string.IsNullOrWhiteSpace(title) && !LooksLikeJobTitle(title) && LooksLikeCompanyName(title)) { if (company is null) company = title; title = null; } if (!string.IsNullOrWhiteSpace(company) && !LooksLikeCompanyName(company) && LooksLikeJobTitle(company) && title is null) { title = company; company = null; } job.Title = title; job.Company = company; job.Location = location; job.Start = NormalizeDateValue(job.Start); job.End = NormalizeDateValue(job.End); job.Bullets = CleanList(job.Bullets) .Select(NormalizeBullet) .Where(bullet => bullet is not null) .Select(bullet => bullet!) .Where(bullet => IsUsefulJobBullet(bullet, job.Title, job.Company)) .ToList(); job.Skills = CleanList(job.Skills); job.IsCurrent = job.IsCurrent || string.Equals(job.End, "present", StringComparison.OrdinalIgnoreCase) || string.Equals(job.End, "current", StringComparison.OrdinalIgnoreCase); return job; } private static string? NormalizeBullet(string? value) { if (string.IsNullOrWhiteSpace(value)) return null; return value.Trim().TrimStart('-', '•', '*', ' '); } private static bool IsUsefulJobBullet(string? value, string? title, string? company) { var trimmed = TrimOrNull(value); if (trimmed is null) return false; if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || trimmed.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)) return false; if (title is not null && trimmed.Equals(title, StringComparison.OrdinalIgnoreCase)) return false; if (company is not null && trimmed.Equals(company, StringComparison.OrdinalIgnoreCase)) return false; if (trimmed.Length < 12 && !trimmed.Contains(' ')) return false; return true; } private static string? NormalizeJobTitle(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || LooksLikeUrlOrEmail(trimmed)) return null; trimmed = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ',', '-', ':'); return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed; } private static string? NormalizeCompanyName(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || LooksLikeUrlOrEmail(trimmed)) return null; if (trimmed.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)) return null; if (trimmed.Contains('.') && trimmed.Contains(' ')) return null; trimmed = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ',', '-', ':'); return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed; } private static string? NormalizeLocationValue(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; if (LooksLikeDateRange(trimmed) || LooksLikeSectionHeading(trimmed) || LooksLikeUrlOrEmail(trimmed)) return null; if (trimmed.Any(char.IsDigit) || trimmed.Length > 80) return null; var normalized = Regex.Replace(trimmed, @"\s+[A-Z](?:\s+[A-Z]){2,}(?:\b.*)?$", string.Empty).Trim(); normalized = Regex.Replace(normalized, @"\b(?:remote|hybrid)\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim(); normalized = Regex.Replace(normalized, @"\b(?:sales representative|developer|engineer|manager|consultant|analyst|designer|specialist|technician)\b.*$", string.Empty, RegexOptions.IgnoreCase).Trim(); normalized = Regex.Replace(normalized, @"\s+", " ").Trim(' ', '|', ';', ':'); var parts = normalized.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); if (parts.Length == 0 || parts.Length > 4) return null; if (parts.Any(part => !Regex.IsMatch(part, @"^[\p{L}][\p{L}'’\-. ]+$"))) return null; if (parts.Any(LooksLikeSkillToken)) return null; return string.Join(", ", parts); } private static string? NormalizeWebsite(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; if (trimmed.Contains("linkedin.com", StringComparison.OrdinalIgnoreCase)) return null; var candidate = trimmed; if (!candidate.Contains("://", StringComparison.Ordinal)) candidate = $"https://{candidate}"; if (!Uri.TryCreate(candidate, UriKind.Absolute, out var uri)) return null; var host = uri.Host.Trim().Trim('.').ToLowerInvariant(); if (string.IsNullOrWhiteSpace(host) || !Regex.IsMatch(host, @"^(?:[a-z0-9-]+\.)+[a-z]{2,}$", RegexOptions.IgnoreCase)) return null; return host; } private static string? NormalizeLinkedIn(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; var candidate = trimmed; if (!candidate.Contains("://", StringComparison.Ordinal)) candidate = $"https://{candidate}"; if (!Uri.TryCreate(candidate, UriKind.Absolute, out var uri)) return null; if (!uri.Host.Contains("linkedin.com", StringComparison.OrdinalIgnoreCase)) return null; var path = uri.AbsolutePath.TrimEnd('/'); if (!Regex.IsMatch(path, @"^/(in|pub)/[^/]+(?:/[^/]+){0,2}$", RegexOptions.IgnoreCase)) return null; return $"https://www.linkedin.com{path}"; } private static string? NormalizeDateValue(string? value) { var trimmed = TrimOrNull(value); return trimmed is not null && LooksLikeDateRange(trimmed) ? trimmed : null; } private static bool LooksLikeDateRange(string value) { return Regex.IsMatch(value, @"^(?:\d{1,2}/\d{1,2}/\d{4}|(?:Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|Sept|September|Oct|October|Nov|November|Dec|December)\s+\d{4}|\d{4}|Present|Current)(?:\s*[-–]\s*(?:\d{1,2}/\d{1,2}/\d{4}|(?:Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|Sept|September|Oct|October|Nov|November|Dec|December)\s+\d{4}|\d{4}|Present|Current))?$", RegexOptions.IgnoreCase); } private static bool LooksLikeUrlOrEmail(string value) { return value.Contains('@') || value.Contains("www.", StringComparison.OrdinalIgnoreCase) || value.Contains("http://", StringComparison.OrdinalIgnoreCase) || value.Contains("https://", StringComparison.OrdinalIgnoreCase); } private static bool LooksLikeSectionHeading(string value) { return value.Equals("Work Experience", StringComparison.OrdinalIgnoreCase) || value.Equals("Experience", StringComparison.OrdinalIgnoreCase) || value.Equals("Employment History", StringComparison.OrdinalIgnoreCase) || value.Equals("Education", StringComparison.OrdinalIgnoreCase) || value.Equals("Skills", StringComparison.OrdinalIgnoreCase) || value.Equals("Languages", StringComparison.OrdinalIgnoreCase) || value.Equals("Interests", StringComparison.OrdinalIgnoreCase) || value.Equals("Contact", StringComparison.OrdinalIgnoreCase) || value.Equals("Professional Summary", StringComparison.OrdinalIgnoreCase) || value.Equals("Summary", StringComparison.OrdinalIgnoreCase); } private static bool LooksLikeJobTitle(string value) { if (string.IsNullOrWhiteSpace(value) || LooksLikeDateRange(value) || LooksLikeUrlOrEmail(value)) return false; return Regex.IsMatch(value, @"\b(developer|engineer|manager|lead|architect|consultant|specialist|analyst|administrator|coordinator|director|designer|intern|officer|owner|founder|teacher|researcher|writer|editor|producer|assistant|technician|supervisor|head)\b", RegexOptions.IgnoreCase) || (value.Split(' ', StringSplitOptions.RemoveEmptyEntries).Length <= 6 && !LooksLikeCompanyName(value)); } private static bool LooksLikeCompanyName(string value) { if (string.IsNullOrWhiteSpace(value) || LooksLikeDateRange(value) || LooksLikeUrlOrEmail(value)) return false; return Regex.IsMatch(value, @"\b(inc|llc|ltd|limited|plc|corp|corporation|company|group|university|college|council|municipality|kommune|bank|studio|agency|institute|hospital|school|technologies|technology|systems|solutions|consulting|consultants|partners|foundation|ministry|government)\b", RegexOptions.IgnoreCase) || value.Contains('&') || Regex.IsMatch(value, @"\b[A-Z]{2,}\b"); } private static bool LooksLikeSkillToken(string value) { var normalized = TrimOrNull(value)?.Trim('.', ' '); return normalized is not null && NonLocationTokens.Contains(normalized); } private static bool LooksLikeQualification(string value) { return Regex.IsMatch(value, @"\b(level\s*\d+|nvq|btec|gcse|a-?level|diploma|certificate|certification|bachelor(?:'s)?|master(?:'s)?|phd|doctorate|mba|ba|bsc|msc|ma|associate|apprenticeship|degree|ict)\b", RegexOptions.IgnoreCase); } private static bool LooksLikeInstitutionName(string value) { return Regex.IsMatch(value, @"\b(university|college|school|academy|institute|faculty|campus|council|polytechnic)\b", RegexOptions.IgnoreCase); } private static string? NormalizeQualification(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; if (LooksLikeDateRange(trimmed) || LooksLikeUrlOrEmail(trimmed) || LooksLikeSectionHeading(trimmed)) return null; trimmed = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ';', ':'); return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed; } private static string? NormalizeInstitution(string? value) { var trimmed = TrimOrNull(value); if (trimmed is null) return null; if (LooksLikeDateRange(trimmed) || LooksLikeUrlOrEmail(trimmed) || LooksLikeSectionHeading(trimmed)) return null; trimmed = Regex.Replace(trimmed, @"\s+", " ").Trim(' ', '|', ';', ':'); return string.IsNullOrWhiteSpace(trimmed) ? null : trimmed; } private static string? NormalizeQualificationLevel(string? explicitValue, string? qualificationText) { var candidate = TrimOrNull(explicitValue) ?? TrimOrNull(qualificationText); if (candidate is null) return null; if (Regex.IsMatch(candidate, @"\b(phd|doctorate|dphil)\b", RegexOptions.IgnoreCase)) return "PhD"; if (Regex.IsMatch(candidate, @"\b(master(?:'s)?|msc|m\.sc|ma|m\.a|mba|meng|meng)\b", RegexOptions.IgnoreCase)) return "Master"; if (Regex.IsMatch(candidate, @"\b(bachelor(?:'s)?|bsc|b\.sc|ba|b\.a|beng|llb|undergraduate degree)\b", RegexOptions.IgnoreCase)) return "Bachelor"; if (Regex.IsMatch(candidate, @"\b(diploma|certificate|certification|nvq|btec|level\s*\d+|apprenticeship|associate degree)\b", RegexOptions.IgnoreCase)) return "Diploma/Certificate"; if (Regex.IsMatch(candidate, @"\b(gcse|a-?level|secondary|high school|gymnasium)\b", RegexOptions.IgnoreCase)) return "Secondary"; return "Other"; } private static StructuredCvEducation NormalizeEducation(StructuredCvEducation? education) { education ??= new StructuredCvEducation(); education.Qualification = NormalizeQualification(education.Qualification); education.QualificationLevel = NormalizeQualificationLevel(education.QualificationLevel, education.Qualification); education.Institution = NormalizeInstitution(education.Institution); education.Location = NormalizeLocationValue(education.Location); education.Start = NormalizeDateValue(education.Start); education.End = NormalizeDateValue(education.End); education.Details = CleanList(education.Details); if (!string.IsNullOrWhiteSpace(education.Qualification) && !string.IsNullOrWhiteSpace(education.Institution)) { var qualificationLooksInstitutional = LooksLikeInstitutionName(education.Qualification) && !LooksLikeQualification(education.Qualification); var institutionLooksQualification = LooksLikeQualification(education.Institution) && !LooksLikeInstitutionName(education.Institution); if (qualificationLooksInstitutional && institutionLooksQualification) { (education.Qualification, education.Institution) = (education.Institution, education.Qualification); education.QualificationLevel = NormalizeQualificationLevel(education.QualificationLevel, education.Qualification); } } return education; } private static StructuredCvCertification NormalizeCertification(StructuredCvCertification? certification) { certification ??= new StructuredCvCertification(); certification.Name = NormalizeQualification(certification.Name); certification.Issuer = NormalizeInstitution(certification.Issuer); certification.Location = NormalizeLocationValue(certification.Location); certification.Date = NormalizeDateValue(certification.Date); certification.Details = CleanList(certification.Details); return certification; } private static StructuredCvProject NormalizeProject(StructuredCvProject? project) { project ??= new StructuredCvProject(); project.Name = NormalizeQualification(project.Name); project.Role = NormalizeJobTitle(project.Role); project.Location = NormalizeLocationValue(project.Location); project.Start = NormalizeDateValue(project.Start); project.End = NormalizeDateValue(project.End); project.Bullets = CleanList(project.Bullets) .Select(NormalizeBullet) .Where(bullet => bullet is not null) .Select(bullet => bullet!) .ToList(); project.Skills = CleanList(project.Skills); return project; } private static StructuredCvLanguage NormalizeLanguage(StructuredCvLanguage? language) { language ??= new StructuredCvLanguage(); var originalName = TrimOrNull(language.Name); var normalizedName = HumanLanguageCatalog.NormalizeLanguageName(originalName); var normalizedLevel = HumanLanguageCatalog.ExtractLevel(language.Level) ?? HumanLanguageCatalog.ExtractLevel(originalName); language.Name = normalizedName is not null && normalizedLevel is not null ? normalizedName : null; language.Level = normalizedLevel; language.Notes = TrimOrNull(language.Notes); return language; } private static List<StructuredCvSection> NormalizeSections(IEnumerable<StructuredCvSection>? sections) { return (sections ?? Array.Empty<StructuredCvSection>()) .Select(section => new StructuredCvSection { Name = string.IsNullOrWhiteSpace(section?.Name) ? "General" : section.Name.Trim(), Content = section?.Content?.Trim() ?? string.Empty, WordCount = section?.WordCount is > 0 ? section.WordCount : CountWords(section?.Content), }) .Where(section => !string.IsNullOrWhiteSpace(section.Content)) .ToList(); } private static List<StructuredCvSection> BuildSections(StructuredCvProfile profile) { var sections = new List<StructuredCvSection>(); var contactLines = new List<string>(); AddIf(contactLines, profile.Contact.FullName); AddIf(contactLines, profile.Contact.Headline); AddIf(contactLines, profile.Contact.Email); AddIf(contactLines, profile.Contact.Phone); AddIf(contactLines, profile.Contact.Location); AddIf(contactLines, profile.Contact.Website); AddIf(contactLines, profile.Contact.LinkedIn); AddSectionIfAny(sections, "Contact", contactLines); AddSectionIfAny(sections, "Professional Summary", profile.Summary); if (profile.Jobs.Count > 0) { var lines = new List<string>(); foreach (var job in profile.Jobs) { AddIf(lines, $"### {job.Title}".Trim()); var meta = string.Join(" | ", new[] { job.Company, job.Location, FormatDateRange(job.Start, job.End, job.IsCurrent) }.Where(value => !string.IsNullOrWhiteSpace(value))); AddIf(lines, meta); lines.AddRange(job.Bullets.Select(bullet => $"- {bullet}")); if (job.Skills.Count > 0) { lines.Add($"Skills: {string.Join(", ", job.Skills)}"); } if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty); } AddSectionIfAny(sections, "Work Experience", lines); } if (profile.Education.Count > 0) { var lines = new List<string>(); foreach (var education in profile.Education) { AddIf(lines, $"### {education.Qualification}".Trim()); var meta = string.Join(" | ", new[] { education.Institution, education.Location, FormatDateRange(education.Start, education.End, false) }.Where(value => !string.IsNullOrWhiteSpace(value))); AddIf(lines, meta); if (!string.IsNullOrWhiteSpace(education.QualificationLevel)) AddIf(lines, $"Level: {education.QualificationLevel}"); lines.AddRange(education.Details.Select(detail => $"- {detail}")); if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty); } AddSectionIfAny(sections, "Education", lines); } if (profile.Certifications.Count > 0) { var lines = new List<string>(); foreach (var certification in profile.Certifications) { AddIf(lines, $"### {certification.Name}".Trim()); var meta = string.Join(" | ", new[] { certification.Issuer, certification.Location, certification.Date }.Where(value => !string.IsNullOrWhiteSpace(value))); AddIf(lines, meta); lines.AddRange(certification.Details.Select(detail => $"- {detail}")); if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty); } AddSectionIfAny(sections, "Certifications", lines); } if (profile.Projects.Count > 0) { var lines = new List<string>(); foreach (var project in profile.Projects) { AddIf(lines, $"### {project.Name}".Trim()); var meta = string.Join(" | ", new[] { project.Role, project.Location, FormatDateRange(project.Start, project.End, false) }.Where(value => !string.IsNullOrWhiteSpace(value))); AddIf(lines, meta); lines.AddRange(project.Bullets.Select(bullet => $"- {bullet}")); if (project.Skills.Count > 0) AddIf(lines, $"Skills: {string.Join(", ", project.Skills)}"); if (lines.Count > 0 && !string.IsNullOrWhiteSpace(lines[^1])) lines.Add(string.Empty); } AddSectionIfAny(sections, "Projects", lines); } AddSectionIfAny(sections, "Skills", profile.Skills); if (profile.Languages.Count > 0) { AddSectionIfAny(sections, "Languages", profile.Languages.Select(language => { var value = language.Name ?? string.Empty; if (!string.IsNullOrWhiteSpace(language.Level)) value += $": {language.Level}"; if (!string.IsNullOrWhiteSpace(language.Notes)) value += $" ({language.Notes})"; return value; }).ToList()); } AddSectionIfAny(sections, "Interests", profile.Interests); foreach (var other in profile.OtherSections) { AddSectionIfAny(sections, other.Title ?? "Other", other.Items); } return NormalizeSections(sections); } private static void AddSectionIfAny(List<StructuredCvSection> sections, string name, IEnumerable<string>? lines) { var content = string.Join("\n", (lines ?? Array.Empty<string>()).Where(line => !string.IsNullOrWhiteSpace(line)).Select(line => line.Trim())).Trim(); if (string.IsNullOrWhiteSpace(content)) return; sections.Add(new StructuredCvSection { Name = name, Content = content, WordCount = CountWords(content) }); } private static void AddIf(List<string> lines, string? value) { if (!string.IsNullOrWhiteSpace(value)) lines.Add(value.Trim()); } private static void ApplyContact(StructuredCvContact contact, string content) { var lines = content.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries); contact.Email ??= Regex.Match(content, @"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}", RegexOptions.IgnoreCase).Value.NullIfWhitespace(); contact.Phone ??= Regex.Match(content, @"(?<!\w)(?:\+?\d[\d\s().-]{6,}\d)", RegexOptions.IgnoreCase).Value.NullIfWhitespace(); foreach (var line in lines) { if (contact.LinkedIn is null && line.Contains("linkedin", StringComparison.OrdinalIgnoreCase)) { contact.LinkedIn = line.Trim(); continue; } if (contact.Website is null && (line.Contains("http", StringComparison.OrdinalIgnoreCase) || line.Contains("www.", StringComparison.OrdinalIgnoreCase) || line.Contains(".") && !line.Contains('@'))) { contact.Website = line.Trim(); } } var leftovers = lines.Where(line => !line.Contains('@') && !line.Contains("linkedin", StringComparison.OrdinalIgnoreCase) && !line.Equals(contact.Website, StringComparison.OrdinalIgnoreCase) && !line.Equals(contact.Phone, StringComparison.OrdinalIgnoreCase)).ToList(); if (leftovers.Count > 0) contact.FullName ??= leftovers[0].Trim(); if (leftovers.Count > 1) contact.Headline ??= leftovers[1].Trim(); if (leftovers.Count > 2) contact.Location ??= leftovers[2].Trim(); } private static List<StructuredCvLanguage> ParseLanguages(string content) { return SplitList(content) .Select(item => { var name = item; string? level = null; string? notes = null; var colonIndex = item.IndexOf(':'); if (colonIndex > 0) { name = item[..colonIndex].Trim(); var remainder = item[(colonIndex + 1)..].Trim(); var noteMatch = Regex.Match(remainder, @"^(.*?)\s*\((.*?)\)$"); if (noteMatch.Success) { level = noteMatch.Groups[1].Value.NullIfWhitespace(); notes = noteMatch.Groups[2].Value.NullIfWhitespace(); } else { level = remainder.NullIfWhitespace(); } } var normalizedLevel = HumanLanguageCatalog.ExtractLevel(level) ?? HumanLanguageCatalog.ExtractLevel(item); return new StructuredCvLanguage { Name = normalizedLevel is not null ? HumanLanguageCatalog.NormalizeLanguageName(name) : null, Level = normalizedLevel, Notes = notes, }; }) .Where(language => !string.IsNullOrWhiteSpace(language.Name)) .ToList(); } private static List<StructuredCvJob> ParseJobs(string content) { var blocks = SplitBlocks(content); return blocks.Select(ParseJobBlock).Where(job => job is not null).Select(job => job!).ToList(); } private static StructuredCvJob? ParseJobBlock(string block) { var lines = block.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); if (lines.Count == 0) return null; var job = new StructuredCvJob(); if (lines[0].StartsWith("###", StringComparison.Ordinal)) lines[0] = lines[0].TrimStart('#', ' '); job.Title = lines[0].NullIfWhitespace(); var metadata = lines.Skip(1).TakeWhile(line => !IsBullet(line)).ToList(); var dateValue = metadata.Select(line => Regex.Match(line, @"(?:(?:\w+\s+)?\d{4}|Present|Current)(?:\s*[-–]\s*(?:(?:\w+\s+)?\d{4}|Present|Current))?", RegexOptions.IgnoreCase).Value.NullIfWhitespace()).FirstOrDefault(value => value is not null); if (!string.IsNullOrWhiteSpace(dateValue)) { var parts = Regex.Split(dateValue, "\\s*[-–]\\s*"); job.Start = parts.FirstOrDefault().NullIfWhitespace(); job.End = parts.Skip(1).FirstOrDefault().NullIfWhitespace(); job.IsCurrent = string.Equals(job.End, "present", StringComparison.OrdinalIgnoreCase) || string.Equals(job.End, "current", StringComparison.OrdinalIgnoreCase); } var metadataWithoutDates = metadata .Select(line => string.IsNullOrWhiteSpace(dateValue) ? line : line.Replace(dateValue, string.Empty)) .Select(line => line.Trim(' ', '|', ',', '-')) .Where(line => !string.IsNullOrWhiteSpace(line)) .ToList(); if (metadataWithoutDates.Count > 0) job.Company = metadataWithoutDates[0].NullIfWhitespace(); if (metadataWithoutDates.Count > 1) job.Location = metadataWithoutDates[1].NullIfWhitespace(); job.Bullets = lines.Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList(); job.Skills = lines .Where(line => line.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)) .SelectMany(line => SplitList(line[(line.IndexOf(':') + 1)..])) .ToList(); return string.IsNullOrWhiteSpace(job.Title) && string.IsNullOrWhiteSpace(job.Company) && job.Bullets.Count == 0 ? null : job; } private static List<StructuredCvEducation> ParseEducation(string content) { var blocks = SplitBlocks(content); return blocks.Select(ParseEducationBlock).Where(education => education is not null).Select(education => education!).ToList(); } private static StructuredCvEducation? ParseEducationBlock(string block) { var lines = block.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); if (lines.Count == 0) return null; var education = new StructuredCvEducation(); if (lines[0].StartsWith("###", StringComparison.Ordinal)) lines[0] = lines[0].TrimStart('#', ' '); education.Qualification = lines[0].NullIfWhitespace(); var metadata = lines.Skip(1).TakeWhile(line => !IsBullet(line)).ToList(); var dateValue = metadata.Select(line => Regex.Match(line, @"(?:(?:\w+\s+)?\d{4})(?:\s*[-–]\s*(?:(?:\w+\s+)?\d{4}|Present|Current))?", RegexOptions.IgnoreCase).Value.NullIfWhitespace()).FirstOrDefault(value => value is not null); if (!string.IsNullOrWhiteSpace(dateValue)) { var parts = Regex.Split(dateValue, "\\s*[-–]\\s*"); education.Start = parts.FirstOrDefault().NullIfWhitespace(); education.End = parts.Skip(1).FirstOrDefault().NullIfWhitespace(); } var metadataWithoutDates = metadata .Select(line => string.IsNullOrWhiteSpace(dateValue) ? line : line.Replace(dateValue, string.Empty)) .Select(line => line.Trim(' ', '|', ',', '-')) .Where(line => !string.IsNullOrWhiteSpace(line)) .ToList(); if (metadataWithoutDates.Count > 0) education.Institution = metadataWithoutDates[0].NullIfWhitespace(); if (metadataWithoutDates.Count > 1) education.Location = metadataWithoutDates[1].NullIfWhitespace(); education.Details = lines.Skip(1).Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList(); education.QualificationLevel = NormalizeQualificationLevel(null, education.Qualification); return string.IsNullOrWhiteSpace(education.Qualification) && string.IsNullOrWhiteSpace(education.Institution) && education.Details.Count == 0 ? null : education; } private static List<StructuredCvCertification> ParseCertifications(string content) { var blocks = SplitBlocks(content); return blocks.Select(ParseCertificationBlock).Where(certification => certification is not null).Select(certification => certification!).ToList(); } private static StructuredCvCertification? ParseCertificationBlock(string block) { var lines = block.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); if (lines.Count == 0) return null; var certification = new StructuredCvCertification(); if (lines[0].StartsWith("###", StringComparison.Ordinal)) lines[0] = lines[0].TrimStart('#', ' '); certification.Name = lines[0].NullIfWhitespace(); var metadata = lines.Skip(1).TakeWhile(line => !IsBullet(line)).ToList(); certification.Date = metadata.Select(line => Regex.Match(line, @"(?:(?:\w+\s+)?\d{4}|Present|Current)", RegexOptions.IgnoreCase).Value.NullIfWhitespace()).FirstOrDefault(value => value is not null); var metadataWithoutDates = metadata .Select(line => string.IsNullOrWhiteSpace(certification.Date) ? line : line.Replace(certification.Date, string.Empty)) .Select(line => line.Trim(' ', '|', ',', '-')) .Where(line => !string.IsNullOrWhiteSpace(line)) .ToList(); if (metadataWithoutDates.Count > 0) certification.Issuer = metadataWithoutDates[0].NullIfWhitespace(); if (metadataWithoutDates.Count > 1) certification.Location = metadataWithoutDates[1].NullIfWhitespace(); certification.Details = lines.Skip(1).Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList(); return string.IsNullOrWhiteSpace(certification.Name) && string.IsNullOrWhiteSpace(certification.Issuer) ? null : certification; } private static List<StructuredCvProject> ParseProjects(string content) { var blocks = SplitBlocks(content); return blocks.Select(ParseProjectBlock).Where(project => project is not null).Select(project => project!).ToList(); } private static StructuredCvProject? ParseProjectBlock(string block) { var lines = block.Replace("\r\n", "\n").Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).ToList(); if (lines.Count == 0) return null; var project = new StructuredCvProject(); if (lines[0].StartsWith("###", StringComparison.Ordinal)) lines[0] = lines[0].TrimStart('#', ' '); project.Name = lines[0].NullIfWhitespace(); var metadata = lines.Skip(1).TakeWhile(line => !IsBullet(line) && !line.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)).ToList(); var dateValue = metadata.Select(line => Regex.Match(line, @"(?:(?:\w+\s+)?\d{4}|Present|Current)(?:\s*[-–]\s*(?:(?:\w+\s+)?\d{4}|Present|Current))?", RegexOptions.IgnoreCase).Value.NullIfWhitespace()).FirstOrDefault(value => value is not null); if (!string.IsNullOrWhiteSpace(dateValue)) { var parts = Regex.Split(dateValue, "\\s*[-–]\\s*"); project.Start = parts.FirstOrDefault().NullIfWhitespace(); project.End = parts.Skip(1).FirstOrDefault().NullIfWhitespace(); } var metadataWithoutDates = metadata .Select(line => string.IsNullOrWhiteSpace(dateValue) ? line : line.Replace(dateValue, string.Empty)) .Select(line => line.Trim(' ', '|', ',', '-')) .Where(line => !string.IsNullOrWhiteSpace(line)) .ToList(); if (metadataWithoutDates.Count > 0) project.Role = metadataWithoutDates[0].NullIfWhitespace(); if (metadataWithoutDates.Count > 1) project.Location = metadataWithoutDates[1].NullIfWhitespace(); project.Bullets = lines.Where(IsBullet).Select(line => line.Trim().TrimStart('-', '•', '*', ' ')).Where(line => !string.IsNullOrWhiteSpace(line)).ToList(); project.Skills = lines .Where(line => line.StartsWith("Skills:", StringComparison.OrdinalIgnoreCase)) .SelectMany(line => SplitList(line[(line.IndexOf(':') + 1)..])) .ToList(); return string.IsNullOrWhiteSpace(project.Name) && string.IsNullOrWhiteSpace(project.Role) && project.Bullets.Count == 0 ? null : project; } private static List<string> SplitBlocks(string content) { var normalized = content.Replace("\r\n", "\n").Trim(); if (string.IsNullOrWhiteSpace(normalized)) return new List<string>(); if (normalized.Contains("### ", StringComparison.Ordinal)) { return Regex.Split(normalized, @"(?=^###\s+)" , RegexOptions.Multiline) .Select(block => block.Trim()) .Where(block => !string.IsNullOrWhiteSpace(block)) .ToList(); } return Regex.Split(normalized, @"\n\s*\n") .Select(block => block.Trim()) .Where(block => !string.IsNullOrWhiteSpace(block)) .ToList(); } private static bool IsBullet(string value) { var trimmed = value.TrimStart(); return trimmed.StartsWith("-", StringComparison.Ordinal) || trimmed.StartsWith("•", StringComparison.Ordinal) || trimmed.StartsWith("*", StringComparison.Ordinal); } private static List<string> SplitList(string? content) { if (string.IsNullOrWhiteSpace(content)) return new List<string>(); return content .Replace("\r\n", "\n") .Split('\n', StringSplitOptions.RemoveEmptyEntries) .SelectMany(line => line.Contains(',') && !line.TrimStart().StartsWith("-", StringComparison.Ordinal) ? line.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) : new[] { line }) .Select(item => item.Trim().TrimStart('-', '•', '*', ' ')) .Where(item => !string.IsNullOrWhiteSpace(item)) .Distinct(StringComparer.OrdinalIgnoreCase) .ToList(); } private static List<string> CleanList(IEnumerable<string>? values) { return (values ?? Array.Empty<string>()) .Select(value => value?.Trim() ?? string.Empty) .Where(value => !string.IsNullOrWhiteSpace(value)) .Distinct(StringComparer.OrdinalIgnoreCase) .ToList(); } private static int CountWords(string? content) { if (string.IsNullOrWhiteSpace(content)) return 0; return content.Trim().Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries).Length; } private static string? TrimOrNull(string? value) { return string.IsNullOrWhiteSpace(value) ? null : value.Trim(); } private static string? FormatDateRange(string? start, string? end, bool isCurrent) { if (string.IsNullOrWhiteSpace(start) && string.IsNullOrWhiteSpace(end)) return null; if (string.IsNullOrWhiteSpace(start)) return end; return $"{start} - {(isCurrent ? "Present" : end ?? "Present")}"; } private static string? NullIfWhitespace(this string? value) { return string.IsNullOrWhiteSpace(value) ? null : value.Trim(); } }