308 lines
10 KiB
TypeScript
308 lines
10 KiB
TypeScript
export type ParsedCvSection = {
|
|
name: string;
|
|
content: string;
|
|
wordCount: number;
|
|
};
|
|
|
|
export type StructuredCvFieldMetadata = {
|
|
confidence?: number;
|
|
method?: string;
|
|
sourceSnippet?: string;
|
|
sourcePage?: number;
|
|
sourceBlockId?: string;
|
|
reviewState?: string;
|
|
lastUpdatedAtUtc?: string;
|
|
};
|
|
|
|
export type StructuredCvMetadata = {
|
|
profileVersion?: number;
|
|
appliedExtractionRunId?: number;
|
|
updatedAtUtc?: string;
|
|
fields: Record<string, StructuredCvFieldMetadata>;
|
|
};
|
|
|
|
export type StructuredCvContact = {
|
|
fullName?: string;
|
|
headline?: string;
|
|
email?: string;
|
|
phone?: string;
|
|
location?: string;
|
|
website?: string;
|
|
linkedIn?: string;
|
|
};
|
|
|
|
export type StructuredCvJob = {
|
|
title?: string;
|
|
company?: string;
|
|
location?: string;
|
|
start?: string;
|
|
end?: string;
|
|
isCurrent?: boolean;
|
|
bullets: string[];
|
|
skills: string[];
|
|
};
|
|
|
|
export type StructuredCvEducation = {
|
|
qualification?: string;
|
|
qualificationLevel?: string;
|
|
institution?: string;
|
|
location?: string;
|
|
start?: string;
|
|
end?: string;
|
|
details: string[];
|
|
};
|
|
|
|
export type StructuredCvCertification = {
|
|
name?: string;
|
|
issuer?: string;
|
|
location?: string;
|
|
date?: string;
|
|
details: string[];
|
|
};
|
|
|
|
export type StructuredCvProject = {
|
|
name?: string;
|
|
role?: string;
|
|
location?: string;
|
|
start?: string;
|
|
end?: string;
|
|
bullets: string[];
|
|
skills: string[];
|
|
};
|
|
|
|
export type StructuredCvLanguage = {
|
|
name?: string;
|
|
level?: string;
|
|
notes?: string;
|
|
};
|
|
|
|
export type StructuredCvOtherSection = {
|
|
title?: string;
|
|
items: string[];
|
|
};
|
|
|
|
export type StructuredCvProfile = {
|
|
version: string;
|
|
metadata: StructuredCvMetadata;
|
|
contact: StructuredCvContact;
|
|
summary: string[];
|
|
jobs: StructuredCvJob[];
|
|
education: StructuredCvEducation[];
|
|
certifications: StructuredCvCertification[];
|
|
projects: StructuredCvProject[];
|
|
skills: string[];
|
|
languages: StructuredCvLanguage[];
|
|
interests: string[];
|
|
otherSections: StructuredCvOtherSection[];
|
|
sections: ParsedCvSection[];
|
|
};
|
|
|
|
export function splitLines(value: string) {
|
|
return value
|
|
.split(/\r?\n/)
|
|
.map((item) => item.trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
export function joinLines(values: string[]) {
|
|
return values.join("\n");
|
|
}
|
|
|
|
export function emptyStructuredCv(): StructuredCvProfile {
|
|
return {
|
|
version: "1",
|
|
metadata: { fields: {} },
|
|
contact: {},
|
|
summary: [],
|
|
jobs: [],
|
|
education: [],
|
|
certifications: [],
|
|
projects: [],
|
|
skills: [],
|
|
languages: [],
|
|
interests: [],
|
|
otherSections: [],
|
|
sections: [],
|
|
};
|
|
}
|
|
|
|
function normalizeString(value: unknown) {
|
|
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
|
}
|
|
|
|
function normalizeList(value: unknown) {
|
|
if (!Array.isArray(value)) return [] as string[];
|
|
return value
|
|
.map((item) => (typeof item === "string" ? item.trim() : ""))
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function normalizeParsedSections(value: unknown): ParsedCvSection[] {
|
|
if (!Array.isArray(value)) return [];
|
|
return value
|
|
.map((section) => {
|
|
const content = typeof (section as any)?.content === "string" ? (section as any).content.trim() : "";
|
|
const name = typeof (section as any)?.name === "string" && (section as any).name.trim() ? (section as any).name.trim() : "General";
|
|
const computedWordCount = content ? content.split(/\s+/).length : 0;
|
|
const wordCount = Number.isFinite(Number((section as any)?.wordCount)) ? Number((section as any).wordCount) : computedWordCount;
|
|
return { name, content, wordCount };
|
|
})
|
|
.filter((section) => section.content);
|
|
}
|
|
|
|
function linesFromSection(sections: ParsedCvSection[], names: string[]) {
|
|
const match = sections.find((section) => names.includes(section.name.toLowerCase()));
|
|
return match ? splitLines(match.content) : [];
|
|
}
|
|
|
|
function buildLegacyStructuredCv(sections: ParsedCvSection[]): StructuredCvProfile {
|
|
const summary = linesFromSection(sections, ["professional summary", "summary"]);
|
|
const skills = linesFromSection(sections, ["skills", "core skills", "technical skills"])
|
|
.flatMap((line) => (line.includes(",") ? line.split(",") : [line]))
|
|
.map((item) => item.trim())
|
|
.filter(Boolean);
|
|
const interests = linesFromSection(sections, ["interests"]);
|
|
const languages = linesFromSection(sections, ["languages"]).map((line) => {
|
|
const [name, ...rest] = line.split(":");
|
|
return { name: name?.trim(), level: rest.join(":").trim() || undefined, notes: undefined };
|
|
});
|
|
const contactLines = linesFromSection(sections, ["contact"]);
|
|
const contact: StructuredCvContact = {
|
|
fullName: contactLines.find((line) => /^[A-Z][A-Za-z'`.-]+(?:\s+[A-Z][A-Za-z'`.-]+){1,4}$/.test(line)),
|
|
email: contactLines.find((line) => line.includes("@")),
|
|
phone: contactLines.find((line) => /\+?\d[\d\s().-]{6,}\d/.test(line)),
|
|
linkedIn: contactLines.find((line) => line.toLowerCase().includes("linkedin")),
|
|
website: contactLines.find((line) => !line.includes("@") && /\./.test(line)),
|
|
};
|
|
|
|
return {
|
|
...emptyStructuredCv(),
|
|
metadata: { fields: {} },
|
|
contact,
|
|
summary,
|
|
skills,
|
|
languages,
|
|
interests,
|
|
sections,
|
|
};
|
|
}
|
|
|
|
export function normalizeStructuredCv(value: unknown): StructuredCvProfile {
|
|
if (Array.isArray(value)) {
|
|
return buildLegacyStructuredCv(normalizeParsedSections(value));
|
|
}
|
|
|
|
const source = (value && typeof value === "object" ? value : {}) as any;
|
|
const sections = normalizeParsedSections(source.sections);
|
|
const normalized: StructuredCvProfile = {
|
|
version: normalizeString(source.version) ?? "1",
|
|
metadata: {
|
|
profileVersion: Number.isFinite(Number(source.metadata?.profileVersion)) ? Number(source.metadata.profileVersion) : undefined,
|
|
appliedExtractionRunId: Number.isFinite(Number(source.metadata?.appliedExtractionRunId)) ? Number(source.metadata.appliedExtractionRunId) : undefined,
|
|
updatedAtUtc: normalizeString(source.metadata?.updatedAtUtc),
|
|
fields: source.metadata?.fields && typeof source.metadata.fields === "object"
|
|
? Object.fromEntries(Object.entries(source.metadata.fields as Record<string, any>).map(([key, value]) => [key, {
|
|
confidence: Number.isFinite(Number(value?.confidence)) ? Number(value.confidence) : undefined,
|
|
method: normalizeString(value?.method),
|
|
sourceSnippet: normalizeString(value?.sourceSnippet),
|
|
sourcePage: Number.isFinite(Number(value?.sourcePage)) ? Number(value.sourcePage) : undefined,
|
|
sourceBlockId: normalizeString(value?.sourceBlockId),
|
|
reviewState: normalizeString(value?.reviewState),
|
|
lastUpdatedAtUtc: normalizeString(value?.lastUpdatedAtUtc),
|
|
}]))
|
|
: {},
|
|
},
|
|
contact: {
|
|
fullName: normalizeString(source.contact?.fullName),
|
|
headline: normalizeString(source.contact?.headline),
|
|
email: normalizeString(source.contact?.email),
|
|
phone: normalizeString(source.contact?.phone),
|
|
location: normalizeString(source.contact?.location),
|
|
website: normalizeString(source.contact?.website),
|
|
linkedIn: normalizeString(source.contact?.linkedIn),
|
|
},
|
|
summary: normalizeList(source.summary),
|
|
jobs: Array.isArray(source.jobs)
|
|
? source.jobs.map((job: any) => ({
|
|
title: normalizeString(job?.title),
|
|
company: normalizeString(job?.company),
|
|
location: normalizeString(job?.location),
|
|
start: normalizeString(job?.start),
|
|
end: normalizeString(job?.end),
|
|
isCurrent: Boolean(job?.isCurrent),
|
|
bullets: normalizeList(job?.bullets),
|
|
skills: normalizeList(job?.skills),
|
|
}))
|
|
: [],
|
|
education: Array.isArray(source.education)
|
|
? source.education.map((education: any) => ({
|
|
qualification: normalizeString(education?.qualification),
|
|
qualificationLevel: normalizeString(education?.qualificationLevel),
|
|
institution: normalizeString(education?.institution),
|
|
location: normalizeString(education?.location),
|
|
start: normalizeString(education?.start),
|
|
end: normalizeString(education?.end),
|
|
details: normalizeList(education?.details),
|
|
}))
|
|
: [],
|
|
certifications: Array.isArray(source.certifications)
|
|
? source.certifications.map((certification: any) => ({
|
|
name: normalizeString(certification?.name),
|
|
issuer: normalizeString(certification?.issuer),
|
|
location: normalizeString(certification?.location),
|
|
date: normalizeString(certification?.date),
|
|
details: normalizeList(certification?.details),
|
|
}))
|
|
: [],
|
|
projects: Array.isArray(source.projects)
|
|
? source.projects.map((project: any) => ({
|
|
name: normalizeString(project?.name),
|
|
role: normalizeString(project?.role),
|
|
location: normalizeString(project?.location),
|
|
start: normalizeString(project?.start),
|
|
end: normalizeString(project?.end),
|
|
bullets: normalizeList(project?.bullets),
|
|
skills: normalizeList(project?.skills),
|
|
}))
|
|
: [],
|
|
skills: normalizeList(source.skills),
|
|
languages: Array.isArray(source.languages)
|
|
? source.languages.map((language: any) => ({
|
|
name: normalizeString(language?.name),
|
|
level: normalizeString(language?.level),
|
|
notes: normalizeString(language?.notes),
|
|
}))
|
|
: [],
|
|
interests: normalizeList(source.interests),
|
|
otherSections: Array.isArray(source.otherSections)
|
|
? source.otherSections.map((section: any) => ({
|
|
title: normalizeString(section?.title),
|
|
items: normalizeList(section?.items),
|
|
}))
|
|
: [],
|
|
sections,
|
|
};
|
|
|
|
if (!normalized.sections.length) {
|
|
return {
|
|
...normalized,
|
|
sections: buildLegacyStructuredCv(normalized.sections).sections,
|
|
};
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
export function parseStructuredCvJson(value?: string): StructuredCvProfile {
|
|
if (!value?.trim()) return emptyStructuredCv();
|
|
try {
|
|
return normalizeStructuredCv(JSON.parse(value));
|
|
} catch {
|
|
return emptyStructuredCv();
|
|
}
|
|
}
|
|
|
|
export function getStructuredCvFieldMetadata(profile: StructuredCvProfile, key: string): StructuredCvFieldMetadata | undefined {
|
|
return profile.metadata?.fields?.[key];
|
|
}
|