refactor, security updates, cv extraction upgrades

This commit is contained in:
2026-04-11 01:34:32 +02:00
parent 806b200ac5
commit 27fd70a2d7
59 changed files with 6817 additions and 1561 deletions
+10
View File
@@ -0,0 +1,10 @@
.venv
__pycache__
.pytest_cache
tmp
*.out
*.err
*.pyc
*.pyo
*.pyd
.pytest_cache/
+1 -1
View File
@@ -54,7 +54,7 @@ The script:
- writes pytest cache under `tmp/pytest-cache` to avoid stale root-owned `.pytest_cache` directories
## API
- `GET /health` — health check and runtime capabilities, including Ollama version/model metadata when configured
- `GET /health` — health check and runtime capabilities, including lazy model state (`model_loaded`, `model_disabled`, `summarize_available`, `model_load_error`) plus Ollama version/model metadata when configured
- `POST /summarize` — JSON body `{ "text": "...", "max_length": 150, "min_length": 30 }`
- `POST /extract-text` — multipart file upload, returns extracted text and OCR metadata
- `POST /cv/classify-block` — JSON body `{ "block": "..." }`, uses Ollama when `OLLAMA_MODEL` is configured
+105 -7
View File
@@ -27,6 +27,17 @@ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
SKIP_MODEL_LOAD = os.getenv("AI_SERVICE_SKIP_MODEL_LOAD", "") == "1"
EAGER_MODEL_LOAD = os.getenv("AI_SERVICE_EAGER_MODEL_LOAD", "") == "1"
tokenizer = None
model = None
device = torch.device("cpu")
GPU_AVAILABLE = False
GPU_NAME = None
MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD." if SKIP_MODEL_LOAD else None
MODEL_LOADED = False
MODEL_DISABLED = SKIP_MODEL_LOAD
def _load_runtime():
@@ -40,10 +51,31 @@ def _load_runtime():
return tokenizer, model, device, has_cuda, gpu_name
if SKIP_MODEL_LOAD:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = None, None, torch.device("cpu"), False, None
else:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
def _ensure_runtime_loaded():
global tokenizer, model, device, GPU_AVAILABLE, GPU_NAME, MODEL_LOAD_ERROR, MODEL_LOADED
if MODEL_DISABLED:
MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD."
return False
if MODEL_LOADED and tokenizer is not None and model is not None:
return True
try:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
MODEL_LOAD_ERROR = None
MODEL_LOADED = True
return True
except Exception as exc:
tokenizer, model = None, None
device = torch.device("cpu")
GPU_AVAILABLE = False
GPU_NAME = None
MODEL_LOADED = False
MODEL_LOAD_ERROR = str(exc)
return False
if EAGER_MODEL_LOAD and not SKIP_MODEL_LOAD:
_ensure_runtime_loaded()
cache = TTLCache(maxsize=1024, ttl=60 * 60)
@@ -54,6 +86,10 @@ class SummarizeRequest(BaseModel):
top_skills: int = Field(default=8, ge=3, le=12)
class CvNormalizeRequest(BaseModel):
text: str = Field(min_length=1, max_length=50000)
class CvClassifyBlockRequest(BaseModel):
block: str = Field(min_length=1, max_length=6000)
@@ -127,6 +163,10 @@ async def health():
"gpu_name": GPU_NAME,
"ocr_available": True,
"ocr_languages": OCR_LANGUAGES,
"model_loaded": MODEL_LOADED,
"model_disabled": MODEL_DISABLED,
"summarize_available": MODEL_LOADED and not MODEL_DISABLED,
"model_load_error": MODEL_LOAD_ERROR,
**_ollama_status(),
}
@@ -324,8 +364,8 @@ def _role_focused_excerpt(text: str) -> dict:
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
if tokenizer is None or model is None:
raise HTTPException(status_code=503, detail="Summarizer model is not loaded.")
if not _ensure_runtime_loaded() or tokenizer is None or model is None:
raise HTTPException(status_code=503, detail=MODEL_LOAD_ERROR or "Summarizer model is not loaded.")
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
@@ -363,7 +403,7 @@ def _ollama_generate_json(prompt: str):
)
try:
with urllib_request.urlopen(req, timeout=30) as response:
with urllib_request.urlopen(req, timeout=120) as response:
body = json.loads(response.read().decode("utf-8"))
except HTTPError as ex:
raise HTTPException(status_code=502, detail=f"Ollama request failed with {ex.code}.")
@@ -384,6 +424,64 @@ def _ollama_generate_json(prompt: str):
raise HTTPException(status_code=502, detail="Ollama did not return valid JSON.")
@app.post("/cv/normalize")
async def normalize_cv(req: CvNormalizeRequest):
prompt = f"""
You normalize messy CV text into parser-friendly master-CV text.
Return ONLY valid JSON with this exact shape:
{{
"confidence": 0.0,
"reason": "short reason",
"normalized_text": "string"
}}
Rules for normalized_text:
- Preserve facts only. Do not invent.
- Use markdown section headings exactly like these when data exists:
# Contact
# Professional Summary
# Work Experience
# Education
# Skills
# Languages
# Interests
- Under # Contact, put one plain value per line, no labels unless unavoidable:
Full name line
email line
phone line
website line
location line
- Under # Professional Summary, write 1-3 plain sentences or bullet lines.
- Preserve explicitly mentioned technologies, tools, and methods as skills when they appear in the source.
- Never output helper words like "line", "value", "field", or "item".
- Under # Work Experience, for each job use this exact shape:
Job title only
Company, Location
2019 - Present
- bullet
- bullet
- Under # Education, for each entry use this exact shape:
Qualification line
Institution, Location line
2016 - 2019 line
- detail
- Under # Skills and # Languages, use one bullet per item.
- Remove OCR/layout noise.
- Do not output placeholders like Not specified.
- If uncertain, omit the field/line rather than invent.
CV text:
{req.text.strip()}
""".strip()
parsed = _ollama_generate_json(prompt)
return {
"confidence": parsed.get("confidence"),
"reason": parsed.get("reason"),
"normalized_text": parsed.get("normalized_text"),
}
@app.post("/cv/classify-block")
async def classify_cv_block(req: CvClassifyBlockRequest):
prompt = f"""
+45 -5
View File
@@ -1,5 +1,4 @@
import importlib
import os
import sys
from pathlib import Path
@@ -11,16 +10,23 @@ if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
def load_app_module(monkeypatch):
monkeypatch.setenv("AI_SERVICE_SKIP_MODEL_LOAD", "1")
monkeypatch.delenv("OLLAMA_MODEL", raising=False)
def load_app_module(monkeypatch, *, skip_model_load=True, ollama_model=None):
if skip_model_load:
monkeypatch.setenv("AI_SERVICE_SKIP_MODEL_LOAD", "1")
else:
monkeypatch.delenv("AI_SERVICE_SKIP_MODEL_LOAD", raising=False)
monkeypatch.delenv("AI_SERVICE_EAGER_MODEL_LOAD", raising=False)
if ollama_model is None:
monkeypatch.delenv("OLLAMA_MODEL", raising=False)
else:
monkeypatch.setenv("OLLAMA_MODEL", ollama_model)
if "app" in sys.modules:
del sys.modules["app"]
module = importlib.import_module("app")
return importlib.reload(module)
def test_health_reports_runtime_without_ollama(monkeypatch):
def test_health_reports_runtime_without_ollama_and_without_forcing_model_load(monkeypatch):
module = load_app_module(monkeypatch)
client = TestClient(module.app)
@@ -30,12 +36,46 @@ def test_health_reports_runtime_without_ollama(monkeypatch):
payload = response.json()
assert payload["ok"] is True
assert payload["device"] == "cpu"
assert payload["model_loaded"] is False
assert payload["model_disabled"] is True
assert payload["summarize_available"] is False
assert "disabled" in payload["model_load_error"].lower()
assert payload["ollama_configured"] is False
assert payload["ollama_model"] is None
assert payload["ollama_installed_models"] == []
assert payload["ollama_loaded_models"] == []
def test_summarize_returns_503_with_explicit_reason_when_model_loading_is_disabled(monkeypatch):
module = load_app_module(monkeypatch)
client = TestClient(module.app)
response = client.post("/summarize", json={"text": "Platform engineering role with APIs and Python experience."})
assert response.status_code == 503
payload = response.json()
assert "disabled" in payload["detail"].lower()
def test_health_reports_ollama_unreachable_when_configured_but_not_available(monkeypatch):
module = load_app_module(monkeypatch, ollama_model="qwen2.5:7b")
def boom(path: str):
raise OSError("connection refused")
monkeypatch.setattr(module, "_ollama_json", boom)
client = TestClient(module.app)
response = client.get("/health")
assert response.status_code == 200
payload = response.json()
assert payload["ollama_configured"] is True
assert payload["ollama_reachable"] is False
assert payload["ollama_model"] == "qwen2.5:7b"
assert payload["ollama_model_available"] is False
def test_classify_block_returns_structured_json(monkeypatch):
module = load_app_module(monkeypatch)