Refactor backend project and tighten CV test coverage

This commit is contained in:
2026-04-01 10:42:55 +02:00
parent 44000f96f2
commit 18d1de45cb
9 changed files with 246 additions and 19 deletions
+7 -1
View File
@@ -26,6 +26,7 @@ OCR_LANGUAGES = "eng"
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
SKIP_MODEL_LOAD = os.getenv("AI_SERVICE_SKIP_MODEL_LOAD", "") == "1"
def _load_runtime():
@@ -39,7 +40,10 @@ def _load_runtime():
return tokenizer, model, device, has_cuda, gpu_name
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
if SKIP_MODEL_LOAD:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = None, None, torch.device("cpu"), False, None
else:
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
cache = TTLCache(maxsize=1024, ttl=60 * 60)
@@ -298,6 +302,8 @@ def _role_focused_excerpt(text: str) -> dict:
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
if tokenizer is None or model is None:
raise HTTPException(status_code=503, detail="Summarizer model is not loaded.")
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None