export type ParsedCvSection = { name: string; content: string; wordCount: number; }; export type StructuredCvFieldMetadata = { confidence?: number; method?: string; sourceSnippet?: string; sourcePage?: number; sourceBlockId?: string; reviewState?: string; lastUpdatedAtUtc?: string; }; export type StructuredCvMetadata = { profileVersion?: number; appliedExtractionRunId?: number; updatedAtUtc?: string; fields: Record; }; export type StructuredCvContact = { fullName?: string; headline?: string; email?: string; phone?: string; location?: string; website?: string; linkedIn?: string; }; export type StructuredCvJob = { title?: string; company?: string; location?: string; start?: string; end?: string; isCurrent?: boolean; bullets: string[]; skills: string[]; }; export type StructuredCvEducation = { qualification?: string; qualificationLevel?: string; institution?: string; location?: string; start?: string; end?: string; details: string[]; }; export type StructuredCvCertification = { name?: string; issuer?: string; location?: string; date?: string; details: string[]; }; export type StructuredCvProject = { name?: string; role?: string; location?: string; start?: string; end?: string; bullets: string[]; skills: string[]; }; export type StructuredCvLanguage = { name?: string; level?: string; notes?: string; }; export type StructuredCvOtherSection = { title?: string; items: string[]; }; export type StructuredCvProfile = { version: string; metadata: StructuredCvMetadata; contact: StructuredCvContact; summary: string[]; jobs: StructuredCvJob[]; education: StructuredCvEducation[]; certifications: StructuredCvCertification[]; projects: StructuredCvProject[]; skills: string[]; languages: StructuredCvLanguage[]; interests: string[]; otherSections: StructuredCvOtherSection[]; sections: ParsedCvSection[]; }; export function splitLines(value: string) { return value .split(/\r?\n/) .map((item) => item.trim()) .filter(Boolean); } export function joinLines(values: string[]) { return values.join("\n"); } export function emptyStructuredCv(): StructuredCvProfile { return { version: "1", metadata: { fields: {} }, contact: {}, summary: [], jobs: [], education: [], certifications: [], projects: [], skills: [], languages: [], interests: [], otherSections: [], sections: [], }; } function normalizeString(value: unknown) { return typeof value === "string" && value.trim() ? value.trim() : undefined; } function normalizeList(value: unknown) { if (!Array.isArray(value)) return [] as string[]; return value .map((item) => (typeof item === "string" ? item.trim() : "")) .filter(Boolean); } function normalizeParsedSections(value: unknown): ParsedCvSection[] { if (!Array.isArray(value)) return []; return value .map((section) => { const content = typeof (section as any)?.content === "string" ? (section as any).content.trim() : ""; const name = typeof (section as any)?.name === "string" && (section as any).name.trim() ? (section as any).name.trim() : "General"; const computedWordCount = content ? content.split(/\s+/).length : 0; const wordCount = Number.isFinite(Number((section as any)?.wordCount)) ? Number((section as any).wordCount) : computedWordCount; return { name, content, wordCount }; }) .filter((section) => section.content); } function linesFromSection(sections: ParsedCvSection[], names: string[]) { const match = sections.find((section) => names.includes(section.name.toLowerCase())); return match ? splitLines(match.content) : []; } function buildLegacyStructuredCv(sections: ParsedCvSection[]): StructuredCvProfile { const summary = linesFromSection(sections, ["professional summary", "summary"]); const skills = linesFromSection(sections, ["skills", "core skills", "technical skills"]) .flatMap((line) => (line.includes(",") ? line.split(",") : [line])) .map((item) => item.trim()) .filter(Boolean); const interests = linesFromSection(sections, ["interests"]); const languages = linesFromSection(sections, ["languages"]).map((line) => { const [name, ...rest] = line.split(":"); return { name: name?.trim(), level: rest.join(":").trim() || undefined, notes: undefined }; }); const contactLines = linesFromSection(sections, ["contact"]); const contact: StructuredCvContact = { fullName: contactLines.find((line) => /^[A-Z][A-Za-z'`.-]+(?:\s+[A-Z][A-Za-z'`.-]+){1,4}$/.test(line)), email: contactLines.find((line) => line.includes("@")), phone: contactLines.find((line) => /\+?\d[\d\s().-]{6,}\d/.test(line)), linkedIn: contactLines.find((line) => line.toLowerCase().includes("linkedin")), website: contactLines.find((line) => !line.includes("@") && /\./.test(line)), }; return { ...emptyStructuredCv(), metadata: { fields: {} }, contact, summary, skills, languages, interests, sections, }; } export function normalizeStructuredCv(value: unknown): StructuredCvProfile { if (Array.isArray(value)) { return buildLegacyStructuredCv(normalizeParsedSections(value)); } const source = (value && typeof value === "object" ? value : {}) as any; const sections = normalizeParsedSections(source.sections); const normalized: StructuredCvProfile = { version: normalizeString(source.version) ?? "1", metadata: { profileVersion: Number.isFinite(Number(source.metadata?.profileVersion)) ? Number(source.metadata.profileVersion) : undefined, appliedExtractionRunId: Number.isFinite(Number(source.metadata?.appliedExtractionRunId)) ? Number(source.metadata.appliedExtractionRunId) : undefined, updatedAtUtc: normalizeString(source.metadata?.updatedAtUtc), fields: source.metadata?.fields && typeof source.metadata.fields === "object" ? Object.fromEntries(Object.entries(source.metadata.fields as Record).map(([key, value]) => [key, { confidence: Number.isFinite(Number(value?.confidence)) ? Number(value.confidence) : undefined, method: normalizeString(value?.method), sourceSnippet: normalizeString(value?.sourceSnippet), sourcePage: Number.isFinite(Number(value?.sourcePage)) ? Number(value.sourcePage) : undefined, sourceBlockId: normalizeString(value?.sourceBlockId), reviewState: normalizeString(value?.reviewState), lastUpdatedAtUtc: normalizeString(value?.lastUpdatedAtUtc), }])) : {}, }, contact: { fullName: normalizeString(source.contact?.fullName), headline: normalizeString(source.contact?.headline), email: normalizeString(source.contact?.email), phone: normalizeString(source.contact?.phone), location: normalizeString(source.contact?.location), website: normalizeString(source.contact?.website), linkedIn: normalizeString(source.contact?.linkedIn), }, summary: normalizeList(source.summary), jobs: Array.isArray(source.jobs) ? source.jobs.map((job: any) => ({ title: normalizeString(job?.title), company: normalizeString(job?.company), location: normalizeString(job?.location), start: normalizeString(job?.start), end: normalizeString(job?.end), isCurrent: Boolean(job?.isCurrent), bullets: normalizeList(job?.bullets), skills: normalizeList(job?.skills), })) : [], education: Array.isArray(source.education) ? source.education.map((education: any) => ({ qualification: normalizeString(education?.qualification), qualificationLevel: normalizeString(education?.qualificationLevel), institution: normalizeString(education?.institution), location: normalizeString(education?.location), start: normalizeString(education?.start), end: normalizeString(education?.end), details: normalizeList(education?.details), })) : [], certifications: Array.isArray(source.certifications) ? source.certifications.map((certification: any) => ({ name: normalizeString(certification?.name), issuer: normalizeString(certification?.issuer), location: normalizeString(certification?.location), date: normalizeString(certification?.date), details: normalizeList(certification?.details), })) : [], projects: Array.isArray(source.projects) ? source.projects.map((project: any) => ({ name: normalizeString(project?.name), role: normalizeString(project?.role), location: normalizeString(project?.location), start: normalizeString(project?.start), end: normalizeString(project?.end), bullets: normalizeList(project?.bullets), skills: normalizeList(project?.skills), })) : [], skills: normalizeList(source.skills), languages: Array.isArray(source.languages) ? source.languages.map((language: any) => ({ name: normalizeString(language?.name), level: normalizeString(language?.level), notes: normalizeString(language?.notes), })) : [], interests: normalizeList(source.interests), otherSections: Array.isArray(source.otherSections) ? source.otherSections.map((section: any) => ({ title: normalizeString(section?.title), items: normalizeList(section?.items), })) : [], sections, }; if (!normalized.sections.length) { return { ...normalized, sections: buildLegacyStructuredCv(normalized.sections).sections, }; } return normalized; } export function parseStructuredCvJson(value?: string): StructuredCvProfile { if (!value?.trim()) return emptyStructuredCv(); try { return normalizeStructuredCv(JSON.parse(value)); } catch { return emptyStructuredCv(); } } export function getStructuredCvFieldMetadata(profile: StructuredCvProfile, key: string): StructuredCvFieldMetadata | undefined { return profile.metadata?.fields?.[key]; }