Files
jobtrackingapp/scripts/prepare-cv-approved-fixtures.py

135 lines
4.1 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import pathlib
import re
import sys
from typing import Any
QUALIFICATION_LEVEL_MAP = {
"secondary": "Secondary",
"diploma/certificate": "Diploma/Certificate",
"diploma": "Diploma/Certificate",
"certificate": "Diploma/Certificate",
"bachelor": "Bachelor",
"bachelor's": "Bachelor",
"bachelors": "Bachelor",
"master": "Master",
"master's": "Master",
"masters": "Master",
"phd": "PhD",
"doctorate": "PhD",
"other": "Other",
}
TOP_LEVEL_KEYS = {
"version",
"contact",
"summary",
"jobs",
"education",
"certifications",
"projects",
"skills",
"languages",
"interests",
"otherSections",
}
BARE_QUESTION_RE = re.compile(r'(:\s*)\?(\s*[,}\]])')
BARE_NULL_RE = re.compile(r'(:\s*)Null(\s*[,}\]])')
def load_relaxed_json(path: pathlib.Path) -> Any:
raw = path.read_text(encoding="utf-8", errors="replace")
relaxed = BARE_QUESTION_RE.sub(r'\1null\2', raw)
relaxed = BARE_NULL_RE.sub(r'\1null\2', relaxed)
return json.loads(relaxed)
def normalize_qualification_level(value: Any) -> Any:
if value is None:
return None
if not isinstance(value, str):
return value
normalized = QUALIFICATION_LEVEL_MAP.get(value.strip().lower())
return normalized if normalized is not None else value
def normalize_value(value: Any) -> Any:
if isinstance(value, dict):
out = {}
for key, item in value.items():
if key in {"metadata", "sections"}:
continue
out[key] = normalize_value(item)
return out
if isinstance(value, list):
return [normalize_value(item) for item in value]
if isinstance(value, str):
return value
return value
def normalize_fixture(payload: dict[str, Any]) -> dict[str, Any]:
cleaned = {key: normalize_value(value) for key, value in payload.items() if key in TOP_LEVEL_KEYS}
cleaned.setdefault("version", "1")
cleaned.setdefault("contact", {})
cleaned.setdefault("summary", [])
cleaned.setdefault("jobs", [])
cleaned.setdefault("education", [])
cleaned.setdefault("certifications", [])
cleaned.setdefault("projects", [])
cleaned.setdefault("skills", [])
cleaned.setdefault("languages", [])
cleaned.setdefault("interests", [])
cleaned.setdefault("otherSections", [])
for education in cleaned.get("education", []):
if isinstance(education, dict):
education["qualificationLevel"] = normalize_qualification_level(education.get("qualificationLevel"))
return cleaned
def main() -> int:
parser = argparse.ArgumentParser(description="Prepare hand-edited CV JSON fixtures for benchmark comparison.")
parser.add_argument("--source", default="/home/pi/cvs/jsons", help="Directory containing hand-edited JSON fixtures")
parser.add_argument("--output", default="/home/pi/cvs/approved-jsons", help="Directory to write cleaned approved fixtures")
args = parser.parse_args()
source = pathlib.Path(args.source)
output = pathlib.Path(args.output)
if not source.is_dir():
print(f"Source directory not found: {source}", file=sys.stderr)
return 1
output.mkdir(parents=True, exist_ok=True)
files = sorted(source.glob("*.json"))
if not files:
print(f"No JSON files found in {source}", file=sys.stderr)
return 1
for path in files:
try:
payload = load_relaxed_json(path)
except json.JSONDecodeError as exc:
print(f"Invalid JSON in {path.name}: line {exc.lineno}, column {exc.colno}: {exc.msg}", file=sys.stderr)
continue
if not isinstance(payload, dict):
print(f"Skipping non-object JSON fixture: {path.name}", file=sys.stderr)
continue
normalized = normalize_fixture(payload)
destination = output / path.name
destination.write_text(json.dumps(normalized, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print(f"Prepared {destination}")
return 0
if __name__ == "__main__":
raise SystemExit(main())