refactor, security updates, cv extraction upgrades
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
.venv
|
||||
__pycache__
|
||||
.pytest_cache
|
||||
tmp
|
||||
*.out
|
||||
*.err
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.pytest_cache/
|
||||
@@ -54,7 +54,7 @@ The script:
|
||||
- writes pytest cache under `tmp/pytest-cache` to avoid stale root-owned `.pytest_cache` directories
|
||||
|
||||
## API
|
||||
- `GET /health` — health check and runtime capabilities, including Ollama version/model metadata when configured
|
||||
- `GET /health` — health check and runtime capabilities, including lazy model state (`model_loaded`, `model_disabled`, `summarize_available`, `model_load_error`) plus Ollama version/model metadata when configured
|
||||
- `POST /summarize` — JSON body `{ "text": "...", "max_length": 150, "min_length": 30 }`
|
||||
- `POST /extract-text` — multipart file upload, returns extracted text and OCR metadata
|
||||
- `POST /cv/classify-block` — JSON body `{ "block": "..." }`, uses Ollama when `OLLAMA_MODEL` is configured
|
||||
|
||||
+105
-7
@@ -27,6 +27,17 @@ IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
|
||||
SKIP_MODEL_LOAD = os.getenv("AI_SERVICE_SKIP_MODEL_LOAD", "") == "1"
|
||||
EAGER_MODEL_LOAD = os.getenv("AI_SERVICE_EAGER_MODEL_LOAD", "") == "1"
|
||||
|
||||
|
||||
tokenizer = None
|
||||
model = None
|
||||
device = torch.device("cpu")
|
||||
GPU_AVAILABLE = False
|
||||
GPU_NAME = None
|
||||
MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD." if SKIP_MODEL_LOAD else None
|
||||
MODEL_LOADED = False
|
||||
MODEL_DISABLED = SKIP_MODEL_LOAD
|
||||
|
||||
|
||||
def _load_runtime():
|
||||
@@ -40,10 +51,31 @@ def _load_runtime():
|
||||
return tokenizer, model, device, has_cuda, gpu_name
|
||||
|
||||
|
||||
if SKIP_MODEL_LOAD:
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = None, None, torch.device("cpu"), False, None
|
||||
else:
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
|
||||
def _ensure_runtime_loaded():
|
||||
global tokenizer, model, device, GPU_AVAILABLE, GPU_NAME, MODEL_LOAD_ERROR, MODEL_LOADED
|
||||
if MODEL_DISABLED:
|
||||
MODEL_LOAD_ERROR = "Model loading is disabled by AI_SERVICE_SKIP_MODEL_LOAD."
|
||||
return False
|
||||
if MODEL_LOADED and tokenizer is not None and model is not None:
|
||||
return True
|
||||
try:
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
|
||||
MODEL_LOAD_ERROR = None
|
||||
MODEL_LOADED = True
|
||||
return True
|
||||
except Exception as exc:
|
||||
tokenizer, model = None, None
|
||||
device = torch.device("cpu")
|
||||
GPU_AVAILABLE = False
|
||||
GPU_NAME = None
|
||||
MODEL_LOADED = False
|
||||
MODEL_LOAD_ERROR = str(exc)
|
||||
return False
|
||||
|
||||
|
||||
if EAGER_MODEL_LOAD and not SKIP_MODEL_LOAD:
|
||||
_ensure_runtime_loaded()
|
||||
|
||||
cache = TTLCache(maxsize=1024, ttl=60 * 60)
|
||||
|
||||
|
||||
@@ -54,6 +86,10 @@ class SummarizeRequest(BaseModel):
|
||||
top_skills: int = Field(default=8, ge=3, le=12)
|
||||
|
||||
|
||||
class CvNormalizeRequest(BaseModel):
|
||||
text: str = Field(min_length=1, max_length=50000)
|
||||
|
||||
|
||||
class CvClassifyBlockRequest(BaseModel):
|
||||
block: str = Field(min_length=1, max_length=6000)
|
||||
|
||||
@@ -127,6 +163,10 @@ async def health():
|
||||
"gpu_name": GPU_NAME,
|
||||
"ocr_available": True,
|
||||
"ocr_languages": OCR_LANGUAGES,
|
||||
"model_loaded": MODEL_LOADED,
|
||||
"model_disabled": MODEL_DISABLED,
|
||||
"summarize_available": MODEL_LOADED and not MODEL_DISABLED,
|
||||
"model_load_error": MODEL_LOAD_ERROR,
|
||||
**_ollama_status(),
|
||||
}
|
||||
|
||||
@@ -324,8 +364,8 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
|
||||
|
||||
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
|
||||
if tokenizer is None or model is None:
|
||||
raise HTTPException(status_code=503, detail="Summarizer model is not loaded.")
|
||||
if not _ensure_runtime_loaded() or tokenizer is None or model is None:
|
||||
raise HTTPException(status_code=503, detail=MODEL_LOAD_ERROR or "Summarizer model is not loaded.")
|
||||
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
|
||||
input_ids = inputs.input_ids.to(device)
|
||||
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
|
||||
@@ -363,7 +403,7 @@ def _ollama_generate_json(prompt: str):
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=30) as response:
|
||||
with urllib_request.urlopen(req, timeout=120) as response:
|
||||
body = json.loads(response.read().decode("utf-8"))
|
||||
except HTTPError as ex:
|
||||
raise HTTPException(status_code=502, detail=f"Ollama request failed with {ex.code}.")
|
||||
@@ -384,6 +424,64 @@ def _ollama_generate_json(prompt: str):
|
||||
raise HTTPException(status_code=502, detail="Ollama did not return valid JSON.")
|
||||
|
||||
|
||||
@app.post("/cv/normalize")
|
||||
async def normalize_cv(req: CvNormalizeRequest):
|
||||
prompt = f"""
|
||||
You normalize messy CV text into parser-friendly master-CV text.
|
||||
Return ONLY valid JSON with this exact shape:
|
||||
{{
|
||||
"confidence": 0.0,
|
||||
"reason": "short reason",
|
||||
"normalized_text": "string"
|
||||
}}
|
||||
|
||||
Rules for normalized_text:
|
||||
- Preserve facts only. Do not invent.
|
||||
- Use markdown section headings exactly like these when data exists:
|
||||
# Contact
|
||||
# Professional Summary
|
||||
# Work Experience
|
||||
# Education
|
||||
# Skills
|
||||
# Languages
|
||||
# Interests
|
||||
- Under # Contact, put one plain value per line, no labels unless unavoidable:
|
||||
Full name line
|
||||
email line
|
||||
phone line
|
||||
website line
|
||||
location line
|
||||
- Under # Professional Summary, write 1-3 plain sentences or bullet lines.
|
||||
- Preserve explicitly mentioned technologies, tools, and methods as skills when they appear in the source.
|
||||
- Never output helper words like "line", "value", "field", or "item".
|
||||
- Under # Work Experience, for each job use this exact shape:
|
||||
Job title only
|
||||
Company, Location
|
||||
2019 - Present
|
||||
- bullet
|
||||
- bullet
|
||||
- Under # Education, for each entry use this exact shape:
|
||||
Qualification line
|
||||
Institution, Location line
|
||||
2016 - 2019 line
|
||||
- detail
|
||||
- Under # Skills and # Languages, use one bullet per item.
|
||||
- Remove OCR/layout noise.
|
||||
- Do not output placeholders like Not specified.
|
||||
- If uncertain, omit the field/line rather than invent.
|
||||
|
||||
CV text:
|
||||
{req.text.strip()}
|
||||
""".strip()
|
||||
|
||||
parsed = _ollama_generate_json(prompt)
|
||||
return {
|
||||
"confidence": parsed.get("confidence"),
|
||||
"reason": parsed.get("reason"),
|
||||
"normalized_text": parsed.get("normalized_text"),
|
||||
}
|
||||
|
||||
|
||||
@app.post("/cv/classify-block")
|
||||
async def classify_cv_block(req: CvClassifyBlockRequest):
|
||||
prompt = f"""
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
@@ -11,16 +10,23 @@ if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
|
||||
def load_app_module(monkeypatch):
|
||||
monkeypatch.setenv("AI_SERVICE_SKIP_MODEL_LOAD", "1")
|
||||
monkeypatch.delenv("OLLAMA_MODEL", raising=False)
|
||||
def load_app_module(monkeypatch, *, skip_model_load=True, ollama_model=None):
|
||||
if skip_model_load:
|
||||
monkeypatch.setenv("AI_SERVICE_SKIP_MODEL_LOAD", "1")
|
||||
else:
|
||||
monkeypatch.delenv("AI_SERVICE_SKIP_MODEL_LOAD", raising=False)
|
||||
monkeypatch.delenv("AI_SERVICE_EAGER_MODEL_LOAD", raising=False)
|
||||
if ollama_model is None:
|
||||
monkeypatch.delenv("OLLAMA_MODEL", raising=False)
|
||||
else:
|
||||
monkeypatch.setenv("OLLAMA_MODEL", ollama_model)
|
||||
if "app" in sys.modules:
|
||||
del sys.modules["app"]
|
||||
module = importlib.import_module("app")
|
||||
return importlib.reload(module)
|
||||
|
||||
|
||||
def test_health_reports_runtime_without_ollama(monkeypatch):
|
||||
def test_health_reports_runtime_without_ollama_and_without_forcing_model_load(monkeypatch):
|
||||
module = load_app_module(monkeypatch)
|
||||
client = TestClient(module.app)
|
||||
|
||||
@@ -30,12 +36,46 @@ def test_health_reports_runtime_without_ollama(monkeypatch):
|
||||
payload = response.json()
|
||||
assert payload["ok"] is True
|
||||
assert payload["device"] == "cpu"
|
||||
assert payload["model_loaded"] is False
|
||||
assert payload["model_disabled"] is True
|
||||
assert payload["summarize_available"] is False
|
||||
assert "disabled" in payload["model_load_error"].lower()
|
||||
assert payload["ollama_configured"] is False
|
||||
assert payload["ollama_model"] is None
|
||||
assert payload["ollama_installed_models"] == []
|
||||
assert payload["ollama_loaded_models"] == []
|
||||
|
||||
|
||||
def test_summarize_returns_503_with_explicit_reason_when_model_loading_is_disabled(monkeypatch):
|
||||
module = load_app_module(monkeypatch)
|
||||
client = TestClient(module.app)
|
||||
|
||||
response = client.post("/summarize", json={"text": "Platform engineering role with APIs and Python experience."})
|
||||
|
||||
assert response.status_code == 503
|
||||
payload = response.json()
|
||||
assert "disabled" in payload["detail"].lower()
|
||||
|
||||
|
||||
def test_health_reports_ollama_unreachable_when_configured_but_not_available(monkeypatch):
|
||||
module = load_app_module(monkeypatch, ollama_model="qwen2.5:7b")
|
||||
|
||||
def boom(path: str):
|
||||
raise OSError("connection refused")
|
||||
|
||||
monkeypatch.setattr(module, "_ollama_json", boom)
|
||||
client = TestClient(module.app)
|
||||
|
||||
response = client.get("/health")
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["ollama_configured"] is True
|
||||
assert payload["ollama_reachable"] is False
|
||||
assert payload["ollama_model"] == "qwen2.5:7b"
|
||||
assert payload["ollama_model_available"] is False
|
||||
|
||||
|
||||
def test_classify_block_returns_structured_json(monkeypatch):
|
||||
module = load_app_module(monkeypatch)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user