refactor, security updates, cv extraction upgrades

This commit is contained in:
2026-04-11 01:34:32 +02:00
parent 806b200ac5
commit 27fd70a2d7
59 changed files with 6817 additions and 1561 deletions
+105 -7
View File
@@ -27,6 +27,17 @@ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
SKIP_MODEL_LOAD = os.getenv("AI_SERVICE_SKIP_MODEL_LOAD", "") == "1"
EAGER_MODEL_LOAD = os.getenv("AI_SERVICE_EAGER_MODEL_LOAD", "") == "1"
tokenizer = None
model = None
device = torch.device("cpu")
GPU_AVAILABLE = False
GPU_NAME = None
MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD." if SKIP_MODEL_LOAD else None
MODEL_LOADED = False
MODEL_DISABLED = SKIP_MODEL_LOAD
def _load_runtime():
@@ -40,10 +51,31 @@ def _load_runtime():
return tokenizer, model, device, has_cuda, gpu_name
if SKIP_MODEL_LOAD:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = None, None, torch.device("cpu"), False, None
else:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
def _ensure_runtime_loaded():
global tokenizer, model, device, GPU_AVAILABLE, GPU_NAME, MODEL_LOAD_ERROR, MODEL_LOADED
if MODEL_DISABLED:
MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD."
return False
if MODEL_LOADED and tokenizer is not None and model is not None:
return True
try:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
MODEL_LOAD_ERROR = None
MODEL_LOADED = True
return True
except Exception as exc:
tokenizer, model = None, None
device = torch.device("cpu")
GPU_AVAILABLE = False
GPU_NAME = None
MODEL_LOADED = False
MODEL_LOAD_ERROR = str(exc)
return False
if EAGER_MODEL_LOAD and not SKIP_MODEL_LOAD:
_ensure_runtime_loaded()
cache = TTLCache(maxsize=1024, ttl=60 * 60)
@@ -54,6 +86,10 @@ class SummarizeRequest(BaseModel):
top_skills: int = Field(default=8, ge=3, le=12)
class CvNormalizeRequest(BaseModel):
text: str = Field(min_length=1, max_length=50000)
class CvClassifyBlockRequest(BaseModel):
block: str = Field(min_length=1, max_length=6000)
@@ -127,6 +163,10 @@ async def health():
"gpu_name": GPU_NAME,
"ocr_available": True,
"ocr_languages": OCR_LANGUAGES,
"model_loaded": MODEL_LOADED,
"model_disabled": MODEL_DISABLED,
"summarize_available": MODEL_LOADED and not MODEL_DISABLED,
"model_load_error": MODEL_LOAD_ERROR,
**_ollama_status(),
}
@@ -324,8 +364,8 @@ def _role_focused_excerpt(text: str) -> dict:
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
if tokenizer is None or model is None:
raise HTTPException(status_code=503, detail="Summarizer model is not loaded.")
if not _ensure_runtime_loaded() or tokenizer is None or model is None:
raise HTTPException(status_code=503, detail=MODEL_LOAD_ERROR or "Summarizer model is not loaded.")
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
@@ -363,7 +403,7 @@ def _ollama_generate_json(prompt: str):
)
try:
with urllib_request.urlopen(req, timeout=30) as response:
with urllib_request.urlopen(req, timeout=120) as response:
body = json.loads(response.read().decode("utf-8"))
except HTTPError as ex:
raise HTTPException(status_code=502, detail=f"Ollama request failed with {ex.code}.")
@@ -384,6 +424,64 @@ def _ollama_generate_json(prompt: str):
raise HTTPException(status_code=502, detail="Ollama did not return valid JSON.")
@app.post("/cv/normalize")
async def normalize_cv(req: CvNormalizeRequest):
prompt = f"""
You normalize messy CV text into parser-friendly master-CV text.
Return ONLY valid JSON with this exact shape:
{{
"confidence": 0.0,
"reason": "short reason",
"normalized_text": "string"
}}
Rules for normalized_text:
- Preserve facts only. Do not invent.
- Use markdown section headings exactly like these when data exists:
# Contact
# Professional Summary
# Work Experience
# Education
# Skills
# Languages
# Interests
- Under # Contact, put one plain value per line, no labels unless unavoidable:
Full name line
email line
phone line
website line
location line
- Under # Professional Summary, write 1-3 plain sentences or bullet lines.
- Preserve explicitly mentioned technologies, tools, and methods as skills when they appear in the source.
- Never output helper words like "line", "value", "field", or "item".
- Under # Work Experience, for each job use this exact shape:
Job title only
Company, Location
2019 - Present
- bullet
- bullet
- Under # Education, for each entry use this exact shape:
Qualification line
Institution, Location line
2016 - 2019 line
- detail
- Under # Skills and # Languages, use one bullet per item.
- Remove OCR/layout noise.
- Do not output placeholders like Not specified.
- If uncertain, omit the field/line rather than invent.
CV text:
{req.text.strip()}
""".strip()
parsed = _ollama_generate_json(prompt)
return {
"confidence": parsed.get("confidence"),
"reason": parsed.get("reason"),
"normalized_text": parsed.get("normalized_text"),
}
@app.post("/cv/classify-block")
async def classify_cv_block(req: CvClassifyBlockRequest):
prompt = f"""