Refactor backend project and tighten CV test coverage
This commit is contained in:
@@ -26,6 +26,7 @@ OCR_LANGUAGES = "eng"
|
||||
IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp"}
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434").rstrip("/")
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "")
|
||||
SKIP_MODEL_LOAD = os.getenv("AI_SERVICE_SKIP_MODEL_LOAD", "") == "1"
|
||||
|
||||
|
||||
def _load_runtime():
|
||||
@@ -39,7 +40,10 @@ def _load_runtime():
|
||||
return tokenizer, model, device, has_cuda, gpu_name
|
||||
|
||||
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
|
||||
if SKIP_MODEL_LOAD:
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = None, None, torch.device("cpu"), False, None
|
||||
else:
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
|
||||
cache = TTLCache(maxsize=1024, ttl=60 * 60)
|
||||
|
||||
|
||||
@@ -298,6 +302,8 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
|
||||
|
||||
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
|
||||
if tokenizer is None or model is None:
|
||||
raise HTTPException(status_code=503, detail="Summarizer model is not loaded.")
|
||||
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
|
||||
input_ids = inputs.input_ids.to(device)
|
||||
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
|
||||
|
||||
Reference in New Issue
Block a user