@@ -67,6 +67,8 @@ public static class StructuredCvProfileJson
: primary . Summary . Concat ( secondary . Summary ) . Distinct ( StringComparer . OrdinalIgnoreCase ) . ToList ( ) ;
if ( primary . Jobs . Count = = 0 ) primary . Jobs = secondary . Jobs ;
if ( primary . Education . Count = = 0 ) primary . Education = secondary . Education ;
if ( primary . Certifications . Count = = 0 ) primary . Certifications = secondary . Certifications ;
if ( primary . Projects . Count = = 0 ) primary . Projects = secondary . Projects ;
primary . Skills = primary . Skills . Count = = 0
? secondary . Skills
: primary . Skills . Concat ( secondary . Skills ) . Distinct ( StringComparer . OrdinalIgnoreCase ) . ToList ( ) ;
@@ -132,6 +134,14 @@ public static class StructuredCvProfileJson
case "education" :
profile . Education = ParseEducation ( section . Content ) ;
break ;
case "certifications" :
case "certificates" :
profile . Certifications = ParseCertifications ( section . Content ) ;
break ;
case "projects" :
case "selected projects" :
profile . Projects = ParseProjects ( section . Content ) ;
break ;
default :
profile . OtherSections . Add ( new StructuredCvOtherSection
{
@@ -165,6 +175,18 @@ public static class StructuredCvProfileJson
| | ! string . IsNullOrWhiteSpace ( education . Institution )
| | education . Details . Count > 0 )
. ToList ( ) ;
profile . Certifications = ( profile . Certifications ? ? new List < StructuredCvCertification > ( ) )
. Select ( NormalizeCertification )
. Where ( certification = > ! string . IsNullOrWhiteSpace ( certification . Name )
| | ! string . IsNullOrWhiteSpace ( certification . Issuer )
| | certification . Details . Count > 0 )
. ToList ( ) ;
profile . Projects = ( profile . Projects ? ? new List < StructuredCvProject > ( ) )
. Select ( NormalizeProject )
. Where ( project = > ! string . IsNullOrWhiteSpace ( project . Name )
| | ! string . IsNullOrWhiteSpace ( project . Role )
| | project . Bullets . Count > 0 )
. ToList ( ) ;
profile . Skills = CleanList ( profile . Skills ) ;
profile . Languages = ( profile . Languages ? ? new List < StructuredCvLanguage > ( ) )
. Select ( NormalizeLanguage )
@@ -299,6 +321,8 @@ public static class StructuredCvProfileJson
if ( trimmed . Any ( char . IsDigit ) | | trimmed . Length > 80 ) return null ;
var normalized = Regex . Replace ( trimmed , @"\s+[A-Z](?:\s+[A-Z]){2,}(?:\b.*)?$" , string . Empty ) . Trim ( ) ;
normalized = Regex . Replace ( normalized , @"\b(?:remote|hybrid)\b.*$" , string . Empty , RegexOptions . IgnoreCase ) . Trim ( ) ;
normalized = Regex . Replace ( normalized , @"\b(?:sales representative|developer|engineer|manager|consultant|analyst|designer|specialist|technician)\b.*$" , string . Empty , RegexOptions . IgnoreCase ) . Trim ( ) ;
normalized = Regex . Replace ( normalized , @"\s+" , " " ) . Trim ( ' ' , '|' , ';' , ':' ) ;
var parts = normalized . Split ( ',' , StringSplitOptions . RemoveEmptyEntries | StringSplitOptions . TrimEntries ) ;
if ( parts . Length = = 0 | | parts . Length > 4 ) return null ;
@@ -421,10 +445,24 @@ public static class StructuredCvProfileJson
return string . IsNullOrWhiteSpace ( trimmed ) ? null : trimmed ;
}
private static string? NormalizeQualificationLevel ( string? explicitValue , string? qualificationText )
{
var candidate = TrimOrNull ( explicitValue ) ? ? TrimOrNull ( qualificationText ) ;
if ( candidate is null ) return null ;
if ( Regex . IsMatch ( candidate , @"\b(phd|doctorate|dphil)\b" , RegexOptions . IgnoreCase ) ) return "PhD" ;
if ( Regex . IsMatch ( candidate , @"\b(master(?:'s)?|msc|m\.sc|ma|m\.a|mba|meng|meng)\b" , RegexOptions . IgnoreCase ) ) return "Master" ;
if ( Regex . IsMatch ( candidate , @"\b(bachelor(?:'s)?|bsc|b\.sc|ba|b\.a|beng|llb|undergraduate degree)\b" , RegexOptions . IgnoreCase ) ) return "Bachelor" ;
if ( Regex . IsMatch ( candidate , @"\b(diploma|certificate|certification|nvq|btec|level\s*\d+|apprenticeship|associate degree)\b" , RegexOptions . IgnoreCase ) ) return "Diploma/Certificate" ;
if ( Regex . IsMatch ( candidate , @"\b(gcse|a-?level|secondary|high school|gymnasium)\b" , RegexOptions . IgnoreCase ) ) return "Secondary" ;
return "Other" ;
}
private static StructuredCvEducation NormalizeEducation ( StructuredCvEducation ? education )
{
education ? ? = new StructuredCvEducation ( ) ;
education . Qualification = NormalizeQualification ( education . Qualification ) ;
education . QualificationLevel = NormalizeQualificationLevel ( education . QualificationLevel , education . Qualification ) ;
education . Institution = NormalizeInstitution ( education . Institution ) ;
education . Location = NormalizeLocationValue ( education . Location ) ;
education . Start = NormalizeDateValue ( education . Start ) ;
@@ -438,12 +476,41 @@ public static class StructuredCvProfileJson
if ( qualificationLooksInstitutional & & institutionLooksQualification )
{
( education . Qualification , education . Institution ) = ( education . Institution , education . Qualification ) ;
education . QualificationLevel = NormalizeQualificationLevel ( education . QualificationLevel , education . Qualification ) ;
}
}
return education ;
}
private static StructuredCvCertification NormalizeCertification ( StructuredCvCertification ? certification )
{
certification ? ? = new StructuredCvCertification ( ) ;
certification . Name = NormalizeQualification ( certification . Name ) ;
certification . Issuer = NormalizeInstitution ( certification . Issuer ) ;
certification . Location = NormalizeLocationValue ( certification . Location ) ;
certification . Date = NormalizeDateValue ( certification . Date ) ;
certification . Details = CleanList ( certification . Details ) ;
return certification ;
}
private static StructuredCvProject NormalizeProject ( StructuredCvProject ? project )
{
project ? ? = new StructuredCvProject ( ) ;
project . Name = NormalizeQualification ( project . Name ) ;
project . Role = NormalizeJobTitle ( project . Role ) ;
project . Location = NormalizeLocationValue ( project . Location ) ;
project . Start = NormalizeDateValue ( project . Start ) ;
project . End = NormalizeDateValue ( project . End ) ;
project . Bullets = CleanList ( project . Bullets )
. Select ( NormalizeBullet )
. Where ( bullet = > bullet is not null )
. Select ( bullet = > bullet ! )
. ToList ( ) ;
project . Skills = CleanList ( project . Skills ) ;
return project ;
}
private static StructuredCvLanguage NormalizeLanguage ( StructuredCvLanguage ? language )
{
language ? ? = new StructuredCvLanguage ( ) ;
@@ -512,12 +579,42 @@ public static class StructuredCvProfileJson
AddIf ( lines , $"### {education.Qualification}" . Trim ( ) ) ;
var meta = string . Join ( " | " , new [ ] { education . Institution , education . Location , FormatDateRange ( education . Start , education . End , false ) } . Where ( value = > ! string . IsNullOrWhiteSpace ( value ) ) ) ;
AddIf ( lines , meta ) ;
if ( ! string . IsNullOrWhiteSpace ( education . QualificationLevel ) ) AddIf ( lines , $"Level: {education.QualificationLevel}" ) ;
lines . AddRange ( education . Details . Select ( detail = > $"- {detail}" ) ) ;
if ( lines . Count > 0 & & ! string . IsNullOrWhiteSpace ( lines [ ^ 1 ] ) ) lines . Add ( string . Empty ) ;
}
AddSectionIfAny ( sections , "Education" , lines ) ;
}
if ( profile . Certifications . Count > 0 )
{
var lines = new List < string > ( ) ;
foreach ( var certification in profile . Certifications )
{
AddIf ( lines , $"### {certification.Name}" . Trim ( ) ) ;
var meta = string . Join ( " | " , new [ ] { certification . Issuer , certification . Location , certification . Date } . Where ( value = > ! string . IsNullOrWhiteSpace ( value ) ) ) ;
AddIf ( lines , meta ) ;
lines . AddRange ( certification . Details . Select ( detail = > $"- {detail}" ) ) ;
if ( lines . Count > 0 & & ! string . IsNullOrWhiteSpace ( lines [ ^ 1 ] ) ) lines . Add ( string . Empty ) ;
}
AddSectionIfAny ( sections , "Certifications" , lines ) ;
}
if ( profile . Projects . Count > 0 )
{
var lines = new List < string > ( ) ;
foreach ( var project in profile . Projects )
{
AddIf ( lines , $"### {project.Name}" . Trim ( ) ) ;
var meta = string . Join ( " | " , new [ ] { project . Role , project . Location , FormatDateRange ( project . Start , project . End , false ) } . Where ( value = > ! string . IsNullOrWhiteSpace ( value ) ) ) ;
AddIf ( lines , meta ) ;
lines . AddRange ( project . Bullets . Select ( bullet = > $"- {bullet}" ) ) ;
if ( project . Skills . Count > 0 ) AddIf ( lines , $"Skills: {string.Join(" , ", project.Skills)}" ) ;
if ( lines . Count > 0 & & ! string . IsNullOrWhiteSpace ( lines [ ^ 1 ] ) ) lines . Add ( string . Empty ) ;
}
AddSectionIfAny ( sections , "Projects" , lines ) ;
}
AddSectionIfAny ( sections , "Skills" , profile . Skills ) ;
if ( profile . Languages . Count > 0 )
@@ -692,9 +789,76 @@ public static class StructuredCvProfileJson
if ( metadataWithoutDates . Count > 1 ) education . Location = metadataWithoutDates [ 1 ] . NullIfWhitespace ( ) ;
education . Details = lines . Skip ( 1 ) . Where ( IsBullet ) . Select ( line = > line . Trim ( ) . TrimStart ( '-' , '•' , '*' , ' ' ) ) . Where ( line = > ! string . IsNullOrWhiteSpace ( line ) ) . ToList ( ) ;
education . QualificationLevel = NormalizeQualificationLevel ( null , education . Qualification ) ;
return string . IsNullOrWhiteSpace ( education . Qualification ) & & string . IsNullOrWhiteSpace ( education . Institution ) & & education . Details . Count = = 0 ? null : education ;
}
private static List < StructuredCvCertification > ParseCertifications ( string content )
{
var blocks = SplitBlocks ( content ) ;
return blocks . Select ( ParseCertificationBlock ) . Where ( certification = > certification is not null ) . Select ( certification = > certification ! ) . ToList ( ) ;
}
private static StructuredCvCertification ? ParseCertificationBlock ( string block )
{
var lines = block . Replace ( "\r\n" , "\n" ) . Split ( '\n' , StringSplitOptions . RemoveEmptyEntries | StringSplitOptions . TrimEntries ) . ToList ( ) ;
if ( lines . Count = = 0 ) return null ;
var certification = new StructuredCvCertification ( ) ;
if ( lines [ 0 ] . StartsWith ( "###" , StringComparison . Ordinal ) ) lines [ 0 ] = lines [ 0 ] . TrimStart ( '#' , ' ' ) ;
certification . Name = lines [ 0 ] . NullIfWhitespace ( ) ;
var metadata = lines . Skip ( 1 ) . TakeWhile ( line = > ! IsBullet ( line ) ) . ToList ( ) ;
certification . Date = metadata . Select ( line = > Regex . Match ( line , @"(?:(?:\w+\s+)?\d{4}|Present|Current)" , RegexOptions . IgnoreCase ) . Value . NullIfWhitespace ( ) ) . FirstOrDefault ( value = > value is not null ) ;
var metadataWithoutDates = metadata
. Select ( line = > string . IsNullOrWhiteSpace ( certification . Date ) ? line : line . Replace ( certification . Date , string . Empty ) )
. Select ( line = > line . Trim ( ' ' , '|' , ',' , '-' ) )
. Where ( line = > ! string . IsNullOrWhiteSpace ( line ) )
. ToList ( ) ;
if ( metadataWithoutDates . Count > 0 ) certification . Issuer = metadataWithoutDates [ 0 ] . NullIfWhitespace ( ) ;
if ( metadataWithoutDates . Count > 1 ) certification . Location = metadataWithoutDates [ 1 ] . NullIfWhitespace ( ) ;
certification . Details = lines . Skip ( 1 ) . Where ( IsBullet ) . Select ( line = > line . Trim ( ) . TrimStart ( '-' , '•' , '*' , ' ' ) ) . Where ( line = > ! string . IsNullOrWhiteSpace ( line ) ) . ToList ( ) ;
return string . IsNullOrWhiteSpace ( certification . Name ) & & string . IsNullOrWhiteSpace ( certification . Issuer ) ? null : certification ;
}
private static List < StructuredCvProject > ParseProjects ( string content )
{
var blocks = SplitBlocks ( content ) ;
return blocks . Select ( ParseProjectBlock ) . Where ( project = > project is not null ) . Select ( project = > project ! ) . ToList ( ) ;
}
private static StructuredCvProject ? ParseProjectBlock ( string block )
{
var lines = block . Replace ( "\r\n" , "\n" ) . Split ( '\n' , StringSplitOptions . RemoveEmptyEntries | StringSplitOptions . TrimEntries ) . ToList ( ) ;
if ( lines . Count = = 0 ) return null ;
var project = new StructuredCvProject ( ) ;
if ( lines [ 0 ] . StartsWith ( "###" , StringComparison . Ordinal ) ) lines [ 0 ] = lines [ 0 ] . TrimStart ( '#' , ' ' ) ;
project . Name = lines [ 0 ] . NullIfWhitespace ( ) ;
var metadata = lines . Skip ( 1 ) . TakeWhile ( line = > ! IsBullet ( line ) & & ! line . StartsWith ( "Skills:" , StringComparison . OrdinalIgnoreCase ) ) . ToList ( ) ;
var dateValue = metadata . Select ( line = > Regex . Match ( line , @"(?:(?:\w+\s+)?\d{4}|Present|Current)(?:\s*[-– ]\s*(?:(?:\w+\s+)?\d{4}|Present|Current))?" , RegexOptions . IgnoreCase ) . Value . NullIfWhitespace ( ) ) . FirstOrDefault ( value = > value is not null ) ;
if ( ! string . IsNullOrWhiteSpace ( dateValue ) )
{
var parts = Regex . Split ( dateValue , "\\s*[-– ]\\s*" ) ;
project . Start = parts . FirstOrDefault ( ) . NullIfWhitespace ( ) ;
project . End = parts . Skip ( 1 ) . FirstOrDefault ( ) . NullIfWhitespace ( ) ;
}
var metadataWithoutDates = metadata
. Select ( line = > string . IsNullOrWhiteSpace ( dateValue ) ? line : line . Replace ( dateValue , string . Empty ) )
. Select ( line = > line . Trim ( ' ' , '|' , ',' , '-' ) )
. Where ( line = > ! string . IsNullOrWhiteSpace ( line ) )
. ToList ( ) ;
if ( metadataWithoutDates . Count > 0 ) project . Role = metadataWithoutDates [ 0 ] . NullIfWhitespace ( ) ;
if ( metadataWithoutDates . Count > 1 ) project . Location = metadataWithoutDates [ 1 ] . NullIfWhitespace ( ) ;
project . Bullets = lines . Where ( IsBullet ) . Select ( line = > line . Trim ( ) . TrimStart ( '-' , '•' , '*' , ' ' ) ) . Where ( line = > ! string . IsNullOrWhiteSpace ( line ) ) . ToList ( ) ;
project . Skills = lines
. Where ( line = > line . StartsWith ( "Skills:" , StringComparison . OrdinalIgnoreCase ) )
. SelectMany ( line = > SplitList ( line [ ( line . IndexOf ( ':' ) + 1 ) . . ] ) )
. ToList ( ) ;
return string . IsNullOrWhiteSpace ( project . Name ) & & string . IsNullOrWhiteSpace ( project . Role ) & & project . Bullets . Count = = 0 ? null : project ;
}
private static List < string > SplitBlocks ( string content )
{
var normalized = content . Replace ( "\r\n" , "\n" ) . Trim ( ) ;