from fastapi import FastAPI, HTTPException from pydantic import BaseModel, Field from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from cachetools import TTLCache import hashlib import re import torch app = FastAPI(title="Local Summarizer") MODEL_NAME = "sshleifer/distilbart-cnn-12-6" MAX_INPUT_CHARS = 20000 MAX_CONTEXT_CHARS = 2200 def _load_runtime(): tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) model.eval() has_cuda = torch.cuda.is_available() device = torch.device("cuda" if has_cuda else "cpu") model.to(device) gpu_name = torch.cuda.get_device_name(0) if has_cuda else None return tokenizer, model, device, has_cuda, gpu_name tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime() cache = TTLCache(maxsize=1024, ttl=60 * 60) class SummarizeRequest(BaseModel): text: str = Field(min_length=1, max_length=MAX_INPUT_CHARS) max_length: int = Field(default=160, ge=24, le=256) min_length: int = Field(default=45, ge=8, le=180) top_skills: int = Field(default=8, ge=3, le=12) def _key(text: str, max_length: int, min_length: int, top_skills: int) -> str: h = hashlib.sha256(text.encode("utf-8")).hexdigest() return f"{h}:{max_length}:{min_length}:{top_skills}" @app.get("/health") async def health(): return { "ok": True, "model": MODEL_NAME, "device": str(device), "gpu_available": GPU_AVAILABLE, "gpu_name": GPU_NAME, } _TECH = [ "python", "c#", "dotnet", ".net", "java", "javascript", "typescript", "react", "node", "sql", "postgres", "postgresql", "mysql", "sqlite", "mongodb", "redis", "aws", "azure", "gcp", "docker", "kubernetes", "terraform", "linux", "git", "ci/cd", "graphql", "rest", ] _SOFT = [ "communication", "collaboration", "teamwork", "problem solving", "leadership", "mentoring", "ownership", "initiative", "adaptability", "stakeholder management", "detail oriented", ] _TECH_PRIORITY = [ "python", "c#", ".net", "dotnet", "typescript", "javascript", "react", "node", "sql", "postgresql", "postgres", "mysql", "sqlite", "docker", "kubernetes", "aws", "azure", "gcp", "terraform", "graphql", "rest", "git", ] _MUST_HAVE_HINTS = [ "must have", "required", "requirements", "you have", "you bring", "essential", "we are looking for", ] _NICE_TO_HAVE_HINTS = [ "nice to have", "bonus", "preferred", "advantageous", "extra plus", ] _SCREENING_HINTS = [ "experience with", "hands-on", "demonstrated", "proven", "track record", "delivered", ] def _rank_tech_skills(skills): ordered = [] seen = set() for preferred in _TECH_PRIORITY: for skill in skills: if skill == preferred and skill not in seen: ordered.append(skill) seen.add(skill) for skill in skills: if skill not in seen: ordered.append(skill) seen.add(skill) return ordered def _strip_html(text: str) -> str: text = re.sub(r"<\s*br\s*/?>", "\n", text, flags=re.IGNORECASE) text = re.sub(r"", "\n", text, flags=re.IGNORECASE) text = re.sub(r"<[^>]+>", " ", text) return re.sub(r"\n{3,}", "\n\n", text).strip() def _extract_bullets(lines, max_items=8): out = [] for ln in lines: s = ln.strip() if not s: continue if re.match(r"^([-*]|\u2022)\s+", s): s = re.sub(r"^([-*]|\u2022)\s+", "", s).strip() if 3 <= len(s) <= 220: out.append(s) if len(out) >= max_items: break return out def _top_keywords(text: str, limit=6): words = re.findall(r"[a-zA-Z][a-zA-Z+#./-]{2,}", text.lower()) stop = { "with", "from", "that", "this", "will", "have", "your", "their", "about", "role", "team", "work", "experience", "skills", "requirements", "responsibilities", "company", "using", "ability", "years", "looking", "candidate", "position", "working", "across", "strong", "building", "support", } counts = {} for word in words: if word in stop or word in _TECH or word in _SOFT: continue counts[word] = counts.get(word, 0) + 1 ordered = sorted(counts.items(), key=lambda item: (-item[1], item[0])) return [word for word, _ in ordered[:limit]] def _first_matching_sentences(text: str, hints, limit=3): sentences = re.split(r"(?<=[.!?])\s+", text) found = [] for sentence in sentences: low = sentence.lower() if any(hint in low for hint in hints): cleaned = sentence.strip() if 20 <= len(cleaned) <= 220: found.append(cleaned) if len(found) >= limit: break return found def _trim_line(text: str, max_len: int = 140) -> str: text = re.sub(r"\s+", " ", text).strip(" -•\t") if len(text) <= max_len: return text return text[: max_len - 1].rstrip() + "…" def _role_focused_excerpt(text: str) -> dict: cleaned = _strip_html(text) lines = [ln.strip() for ln in cleaned.splitlines()] headings = { "responsibilities": ["responsibilities", "what you will do", "what you'll do", "the role", "your role", "you will"], "requirements": ["requirements", "what we are looking for", "what we're looking for", "skills", "experience", "must have"], "nice": ["nice to have", "bonus", "preferred"], } def match_heading(s: str): sl = s.lower().strip(":-\x7f ") for key, words in headings.items(): for word in words: if sl == word or sl.startswith(word + " "): return key return None section = None resp_lines = [] req_lines = [] nice_lines = [] for ln in lines: if not ln: continue heading = match_heading(ln) if heading: section = heading continue if section == "responsibilities": resp_lines.append(ln) elif section == "requirements": req_lines.append(ln) elif section == "nice": nice_lines.append(ln) responsibilities = _extract_bullets(resp_lines, max_items=7) requirements = _extract_bullets(req_lines, max_items=7) nice = _extract_bullets(nice_lines, max_items=5) tech_found = [] soft_found = [] low = cleaned.lower() for t in _TECH: if t in low: tech_found.append(t) for s in _SOFT: if s in low: soft_found.append(s) if not responsibilities and not requirements: any_bullets = _extract_bullets(lines, max_items=10) responsibilities = any_bullets[:6] requirements = any_bullets[6:10] if not requirements: requirements = [_trim_line(x) for x in _first_matching_sentences(cleaned, _MUST_HAVE_HINTS, limit=4)] if not nice: nice = [_trim_line(x) for x in _first_matching_sentences(cleaned, _NICE_TO_HAVE_HINTS, limit=3)] focused_parts = [] if responsibilities: focused_parts.append("Responsibilities:\n- " + "\n- ".join(responsibilities)) if requirements: focused_parts.append("Requirements:\n- " + "\n- ".join(requirements)) if nice: focused_parts.append("Nice to have:\n- " + "\n- ".join(nice)) focused_parts.append("Context:\n" + cleaned[:MAX_CONTEXT_CHARS]) screen_focus = [] for item in requirements[:4]: if any(hint in item.lower() for hint in _SCREENING_HINTS) or len(screen_focus) < 2: screen_focus.append(_trim_line(item)) if not screen_focus: screen_focus = [_trim_line(x) for x in _first_matching_sentences(cleaned, _SCREENING_HINTS, limit=3)] return { "cleaned": cleaned, "focused_input": "\n\n".join(focused_parts), "responsibilities": responsibilities, "requirements": requirements, "nice": nice, "tech": tech_found, "soft": soft_found, "keywords": _top_keywords(cleaned), "screen_focus": screen_focus[:3], } def _model_summarize(text: str, max_length: int, min_length: int) -> str: inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024) input_ids = inputs.input_ids.to(device) attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None with torch.no_grad(): outputs = model.generate( input_ids, attention_mask=attention_mask, max_length=max_length, min_length=min_length, num_beams=3, length_penalty=1.0, no_repeat_ngram_size=3, early_stopping=True, ) return tokenizer.decode(outputs[0], skip_special_tokens=True).strip() @app.post("/summarize") async def summarize(req: SummarizeRequest): if req.min_length >= req.max_length: raise HTTPException(status_code=400, detail="min_length must be smaller than max_length.") key = _key(req.text, req.max_length, req.min_length, req.top_skills) if key in cache: return {"summary": cache[key], "cached": True} info = _role_focused_excerpt(req.text) summary = _model_summarize(info["focused_input"], req.max_length, req.min_length) ranked_tech = [] for t in _rank_tech_skills(info["tech"]): if t not in ranked_tech: ranked_tech.append(t) uniq_soft = [] for s in info["soft"]: if s not in uniq_soft: uniq_soft.append(s) lines = ["Role summary:", summary] if info["requirements"]: lines.append("") lines.append("What the company wants most:") for x in info["requirements"][:5]: lines.append(f"- {_trim_line(x)}") if ranked_tech: lines.append("") lines.append("Top hard skills:") for skill in ranked_tech[: req.top_skills]: lines.append(f"- {skill}") if info["keywords"]: lines.append("") lines.append("Keywords to mirror:") for keyword in info["keywords"][:5]: lines.append(f"- {keyword}") if info["responsibilities"]: lines.append("") lines.append("What you would be doing:") for x in info["responsibilities"][:4]: lines.append(f"- {_trim_line(x)}") if info["nice"]: lines.append("") lines.append("Nice to have:") for x in info["nice"][:3]: lines.append(f"- {_trim_line(x)}") if uniq_soft: lines.append("") lines.append("Relevant soft skills:") for soft in uniq_soft[:5]: lines.append(f"- {soft}") lines.append("") lines.append("Interview focus:") if info["screen_focus"]: for x in info["screen_focus"]: lines.append(f"- Be ready to prove: {_trim_line(x)}") elif info["requirements"]: for x in info["requirements"][:3]: lines.append(f"- Prepare examples that demonstrate: {_trim_line(x)}") elif ranked_tech: for x in ranked_tech[:3]: lines.append(f"- Be ready to explain your hands-on experience with {x}") else: lines.append("- Prepare examples showing relevant impact, collaboration, and delivery.") out = "\n".join(lines).strip() cache[key] = out return {"summary": out, "cached": False}