refactor, security updates, cv extraction upgrades

2026-04-11 01:34:32 +02:00
parent 806b200ac5
commit 27fd70a2d7
59 changed files with 6817 additions and 1561 deletions
@@ -27,6 +27,17 @@ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
 OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
 SKIP_MODEL_LOAD = os.getenv("AI_SERVICE_SKIP_MODEL_LOAD", "") == "1"
+EAGER_MODEL_LOAD = os.getenv("AI_SERVICE_EAGER_MODEL_LOAD", "") == "1"
+
+
+tokenizer = None
+model = None
+device = torch.device("cpu")
+GPU_AVAILABLE = False
+GPU_NAME = None
+MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD." if SKIP_MODEL_LOAD else None
+MODEL_LOADED = False
+MODEL_DISABLED = SKIP_MODEL_LOAD


 def _load_runtime():
@@ -40,10 +51,31 @@ def _load_runtime():
    return tokenizer, model, device, has_cuda, gpu_name


-if SKIP_MODEL_LOAD:
-    tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = None, None, torch.device("cpu"), False, None
-else:
-    tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
+def _ensure_runtime_loaded():
+    global tokenizer, model, device, GPU_AVAILABLE, GPU_NAME, MODEL_LOAD_ERROR, MODEL_LOADED
+    if MODEL_DISABLED:
+        MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD."
+        return False
+    if MODEL_LOADED and tokenizer is not None and model is not None:
+        return True
+    try:
+        tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
+        MODEL_LOAD_ERROR = None
+        MODEL_LOADED = True
+        return True
+    except Exception as exc:
+        tokenizer, model = None, None
+        device = torch.device("cpu")
+        GPU_AVAILABLE = False
+        GPU_NAME = None
+        MODEL_LOADED = False
+        MODEL_LOAD_ERROR = str(exc)
+        return False
+
+
+if EAGER_MODEL_LOAD and not SKIP_MODEL_LOAD:
+    _ensure_runtime_loaded()
+
 cache = TTLCache(maxsize=1024, ttl=60 * 60)


@@ -54,6 +86,10 @@ class SummarizeRequest(BaseModel):
    top_skills: int = Field(default=8, ge=3, le=12)


+class CvNormalizeRequest(BaseModel):
+    text: str = Field(min_length=1, max_length=50000)
+
+
 class CvClassifyBlockRequest(BaseModel):
    block: str = Field(min_length=1, max_length=6000)

@@ -127,6 +163,10 @@ async def health():
        "gpu_name": GPU_NAME,
        "ocr_available": True,
        "ocr_languages": OCR_LANGUAGES,
+        "model_loaded": MODEL_LOADED,
+        "model_disabled": MODEL_DISABLED,
+        "summarize_available": MODEL_LOADED and not MODEL_DISABLED,
+        "model_load_error": MODEL_LOAD_ERROR,
        **_ollama_status(),
    }

@@ -324,8 +364,8 @@ def _role_focused_excerpt(text: str) -> dict:


 def _model_summarize(text: str, max_length: int, min_length: int) -> str:
-    if tokenizer is None or model is None:
-        raise HTTPException(status_code=503, detail="Summarizer model is not loaded.")
+    if not _ensure_runtime_loaded() or tokenizer is None or model is None:
+        raise HTTPException(status_code=503, detail=MODEL_LOAD_ERROR or "Summarizer model is not loaded.")
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
@@ -363,7 +403,7 @@ def _ollama_generate_json(prompt: str):
    )

    try:
-        with urllib_request.urlopen(req, timeout=30) as response:
+        with urllib_request.urlopen(req, timeout=120) as response:
            body = json.loads(response.read().decode("utf-8"))
    except HTTPError as ex:
        raise HTTPException(status_code=502, detail=f"Ollama request failed with {ex.code}.")
@@ -384,6 +424,64 @@ def _ollama_generate_json(prompt: str):
        raise HTTPException(status_code=502, detail="Ollama did not return valid JSON.")


+@app.post("/cv/normalize")
+async def normalize_cv(req: CvNormalizeRequest):
+    prompt = f"""
+You normalize messy CV text into parser-friendly master-CV text.
+Return ONLY valid JSON with this exact shape:
+{{
+  "confidence": 0.0,
+  "reason": "short reason",
+  "normalized_text": "string"
+}}
+
+Rules for normalized_text:
+- Preserve facts only. Do not invent.
+- Use markdown section headings exactly like these when data exists:
+  # Contact
+  # Professional Summary
+  # Work Experience
+  # Education
+  # Skills
+  # Languages
+  # Interests
+- Under # Contact, put one plain value per line, no labels unless unavoidable:
+  Full name line
+  email line
+  phone line
+  website line
+  location line
+- Under # Professional Summary, write 1-3 plain sentences or bullet lines.
+- Preserve explicitly mentioned technologies, tools, and methods as skills when they appear in the source.
+- Never output helper words like "line", "value", "field", or "item".
+- Under # Work Experience, for each job use this exact shape:
+  Job title only
+  Company, Location
+  2019 - Present
+  - bullet
+  - bullet
+- Under # Education, for each entry use this exact shape:
+  Qualification line
+  Institution, Location line
+  2016 - 2019 line
+  - detail
+- Under # Skills and # Languages, use one bullet per item.
+- Remove OCR/layout noise.
+- Do not output placeholders like Not specified.
+- If uncertain, omit the field/line rather than invent.
+
+CV text:
+{req.text.strip()}
+""".strip()
+
+    parsed = _ollama_generate_json(prompt)
+    return {
+        "confidence": parsed.get("confidence"),
+        "reason": parsed.get("reason"),
+        "normalized_text": parsed.get("normalized_text"),
+    }
+
+
@app.post("/cv/classify-block")
 async def classify_cv_block(req: CvClassifyBlockRequest):
    prompt = f"""