feat: improve admin observability and translation-first summaries
This commit is contained in:
+111
-37
@@ -10,12 +10,21 @@ app = FastAPI(title="Local Summarizer")
|
||||
|
||||
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
|
||||
MAX_INPUT_CHARS = 20000
|
||||
MAX_CONTEXT_CHARS = 2200
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
||||
model.eval()
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model.to(device)
|
||||
|
||||
def _load_runtime():
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
||||
model.eval()
|
||||
has_cuda = torch.cuda.is_available()
|
||||
device = torch.device("cuda" if has_cuda else "cpu")
|
||||
model.to(device)
|
||||
gpu_name = torch.cuda.get_device_name(0) if has_cuda else None
|
||||
return tokenizer, model, device, has_cuda, gpu_name
|
||||
|
||||
|
||||
tokenizer, model, device, GPU_AVAILABLE, GPU_NAME = _load_runtime()
|
||||
cache = TTLCache(maxsize=1024, ttl=60 * 60)
|
||||
|
||||
|
||||
@@ -33,7 +42,13 @@ def _key(text: str, max_length: int, min_length: int, top_skills: int) -> str:
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"ok": True, "model": MODEL_NAME}
|
||||
return {
|
||||
"ok": True,
|
||||
"model": MODEL_NAME,
|
||||
"device": str(device),
|
||||
"gpu_available": GPU_AVAILABLE,
|
||||
"gpu_name": GPU_NAME,
|
||||
}
|
||||
|
||||
|
||||
_TECH = [
|
||||
@@ -54,6 +69,17 @@ _TECH_PRIORITY = [
|
||||
]
|
||||
|
||||
|
||||
_MUST_HAVE_HINTS = [
|
||||
"must have", "required", "requirements", "you have", "you bring", "essential", "we are looking for",
|
||||
]
|
||||
_NICE_TO_HAVE_HINTS = [
|
||||
"nice to have", "bonus", "preferred", "advantageous", "extra plus",
|
||||
]
|
||||
_SCREENING_HINTS = [
|
||||
"experience with", "hands-on", "demonstrated", "proven", "track record", "delivered",
|
||||
]
|
||||
|
||||
|
||||
def _rank_tech_skills(skills):
|
||||
ordered = []
|
||||
seen = set()
|
||||
@@ -84,7 +110,7 @@ def _extract_bullets(lines, max_items=8):
|
||||
continue
|
||||
if re.match(r"^([-*]|\u2022)\s+", s):
|
||||
s = re.sub(r"^([-*]|\u2022)\s+", "", s).strip()
|
||||
if 3 <= len(s) <= 200:
|
||||
if 3 <= len(s) <= 220:
|
||||
out.append(s)
|
||||
if len(out) >= max_items:
|
||||
break
|
||||
@@ -96,6 +122,7 @@ def _top_keywords(text: str, limit=6):
|
||||
stop = {
|
||||
"with", "from", "that", "this", "will", "have", "your", "their", "about", "role", "team", "work",
|
||||
"experience", "skills", "requirements", "responsibilities", "company", "using", "ability", "years",
|
||||
"looking", "candidate", "position", "working", "across", "strong", "building", "support",
|
||||
}
|
||||
counts = {}
|
||||
for word in words:
|
||||
@@ -106,6 +133,27 @@ def _top_keywords(text: str, limit=6):
|
||||
return [word for word, _ in ordered[:limit]]
|
||||
|
||||
|
||||
def _first_matching_sentences(text: str, hints, limit=3):
|
||||
sentences = re.split(r"(?<=[.!?])\s+", text)
|
||||
found = []
|
||||
for sentence in sentences:
|
||||
low = sentence.lower()
|
||||
if any(hint in low for hint in hints):
|
||||
cleaned = sentence.strip()
|
||||
if 20 <= len(cleaned) <= 220:
|
||||
found.append(cleaned)
|
||||
if len(found) >= limit:
|
||||
break
|
||||
return found
|
||||
|
||||
|
||||
def _trim_line(text: str, max_len: int = 140) -> str:
|
||||
text = re.sub(r"\s+", " ", text).strip(" -•\t")
|
||||
if len(text) <= max_len:
|
||||
return text
|
||||
return text[: max_len - 1].rstrip() + "…"
|
||||
|
||||
|
||||
def _role_focused_excerpt(text: str) -> dict:
|
||||
cleaned = _strip_html(text)
|
||||
lines = [ln.strip() for ln in cleaned.splitlines()]
|
||||
@@ -162,6 +210,11 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
responsibilities = any_bullets[:6]
|
||||
requirements = any_bullets[6:10]
|
||||
|
||||
if not requirements:
|
||||
requirements = [_trim_line(x) for x in _first_matching_sentences(cleaned, _MUST_HAVE_HINTS, limit=4)]
|
||||
if not nice:
|
||||
nice = [_trim_line(x) for x in _first_matching_sentences(cleaned, _NICE_TO_HAVE_HINTS, limit=3)]
|
||||
|
||||
focused_parts = []
|
||||
if responsibilities:
|
||||
focused_parts.append("Responsibilities:\n- " + "\n- ".join(responsibilities))
|
||||
@@ -169,7 +222,14 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
focused_parts.append("Requirements:\n- " + "\n- ".join(requirements))
|
||||
if nice:
|
||||
focused_parts.append("Nice to have:\n- " + "\n- ".join(nice))
|
||||
focused_parts.append("Context:\n" + cleaned[:1500])
|
||||
focused_parts.append("Context:\n" + cleaned[:MAX_CONTEXT_CHARS])
|
||||
|
||||
screen_focus = []
|
||||
for item in requirements[:4]:
|
||||
if any(hint in item.lower() for hint in _SCREENING_HINTS) or len(screen_focus) < 2:
|
||||
screen_focus.append(_trim_line(item))
|
||||
if not screen_focus:
|
||||
screen_focus = [_trim_line(x) for x in _first_matching_sentences(cleaned, _SCREENING_HINTS, limit=3)]
|
||||
|
||||
return {
|
||||
"cleaned": cleaned,
|
||||
@@ -180,6 +240,7 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
"tech": tech_found,
|
||||
"soft": soft_found,
|
||||
"keywords": _top_keywords(cleaned),
|
||||
"screen_focus": screen_focus[:3],
|
||||
}
|
||||
|
||||
|
||||
@@ -193,7 +254,9 @@ def _model_summarize(text: str, max_length: int, min_length: int) -> str:
|
||||
attention_mask=attention_mask,
|
||||
max_length=max_length,
|
||||
min_length=min_length,
|
||||
num_beams=4,
|
||||
num_beams=3,
|
||||
length_penalty=1.0,
|
||||
no_repeat_ngram_size=3,
|
||||
early_stopping=True,
|
||||
)
|
||||
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
||||
@@ -211,53 +274,64 @@ async def summarize(req: SummarizeRequest):
|
||||
info = _role_focused_excerpt(req.text)
|
||||
summary = _model_summarize(info["focused_input"], req.max_length, req.min_length)
|
||||
|
||||
ranked_tech = []
|
||||
for t in _rank_tech_skills(info["tech"]):
|
||||
if t not in ranked_tech:
|
||||
ranked_tech.append(t)
|
||||
|
||||
uniq_soft = []
|
||||
for s in info["soft"]:
|
||||
if s not in uniq_soft:
|
||||
uniq_soft.append(s)
|
||||
|
||||
lines = ["Role summary:", summary]
|
||||
|
||||
if info["requirements"]:
|
||||
lines.append("")
|
||||
lines.append("What the company wants most:")
|
||||
for x in info["requirements"][:7]:
|
||||
lines.append(f"- {x}")
|
||||
for x in info["requirements"][:5]:
|
||||
lines.append(f"- {_trim_line(x)}")
|
||||
|
||||
if ranked_tech:
|
||||
lines.append("")
|
||||
lines.append("Top hard skills:")
|
||||
for skill in ranked_tech[: req.top_skills]:
|
||||
lines.append(f"- {skill}")
|
||||
|
||||
if info["keywords"]:
|
||||
lines.append("")
|
||||
lines.append("Keywords to mirror:")
|
||||
for keyword in info["keywords"][:5]:
|
||||
lines.append(f"- {keyword}")
|
||||
|
||||
if info["responsibilities"]:
|
||||
lines.append("")
|
||||
lines.append("What you would be doing:")
|
||||
for x in info["responsibilities"][:6]:
|
||||
lines.append(f"- {x}")
|
||||
for x in info["responsibilities"][:4]:
|
||||
lines.append(f"- {_trim_line(x)}")
|
||||
|
||||
if info["nice"]:
|
||||
lines.append("")
|
||||
lines.append("Nice to have:")
|
||||
for x in info["nice"][:5]:
|
||||
lines.append(f"- {x}")
|
||||
for x in info["nice"][:3]:
|
||||
lines.append(f"- {_trim_line(x)}")
|
||||
|
||||
if info["tech"]:
|
||||
uniq = []
|
||||
for t in _rank_tech_skills(info["tech"]):
|
||||
if t not in uniq:
|
||||
uniq.append(t)
|
||||
if uniq_soft:
|
||||
lines.append("")
|
||||
lines.append("Top hard skills: " + ", ".join(uniq[: req.top_skills]))
|
||||
|
||||
if info["soft"]:
|
||||
uniq_soft = []
|
||||
for s in info["soft"]:
|
||||
if s not in uniq_soft:
|
||||
uniq_soft.append(s)
|
||||
lines.append("")
|
||||
lines.append("Relevant soft skills: " + ", ".join(uniq_soft[:8]))
|
||||
|
||||
if info["keywords"]:
|
||||
lines.append("")
|
||||
lines.append("Key themes: " + ", ".join(info["keywords"][:6]))
|
||||
lines.append("Relevant soft skills:")
|
||||
for soft in uniq_soft[:5]:
|
||||
lines.append(f"- {soft}")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Interview focus:")
|
||||
if info["requirements"]:
|
||||
if info["screen_focus"]:
|
||||
for x in info["screen_focus"]:
|
||||
lines.append(f"- Be ready to prove: {_trim_line(x)}")
|
||||
elif info["requirements"]:
|
||||
for x in info["requirements"][:3]:
|
||||
lines.append(f"- Prepare examples that demonstrate: {x}")
|
||||
elif info["tech"]:
|
||||
for x in _rank_tech_skills(info["tech"])[:3]:
|
||||
lines.append(f"- Prepare examples that demonstrate: {_trim_line(x)}")
|
||||
elif ranked_tech:
|
||||
for x in ranked_tech[:3]:
|
||||
lines.append(f"- Be ready to explain your hands-on experience with {x}")
|
||||
else:
|
||||
lines.append("- Prepare examples showing relevant impact, collaboration, and delivery.")
|
||||
|
||||
Reference in New Issue
Block a user