feat: add application package generation and grouped readiness workflows
This commit is contained in:
+69
-71
@@ -11,25 +11,24 @@ app = FastAPI(title="Local Summarizer")
|
||||
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
|
||||
MAX_INPUT_CHARS = 20000
|
||||
|
||||
# The local summarizer is intentionally simple, but we still validate request sizes
|
||||
# so accidental giant pastes do not cause avoidable latency or memory spikes.
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
||||
model.eval()
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model.to(device)
|
||||
cache = TTLCache(maxsize=1024, ttl=60 * 60) # 1 hour cache
|
||||
cache = TTLCache(maxsize=1024, ttl=60 * 60)
|
||||
|
||||
|
||||
class SummarizeRequest(BaseModel):
|
||||
text: str = Field(min_length=1, max_length=MAX_INPUT_CHARS)
|
||||
max_length: int = Field(default=160, ge=24, le=256)
|
||||
min_length: int = Field(default=45, ge=8, le=180)
|
||||
top_skills: int = Field(default=8, ge=3, le=12)
|
||||
|
||||
|
||||
def _key(text: str, max_length: int, min_length: int) -> str:
|
||||
def _key(text: str, max_length: int, min_length: int, top_skills: int) -> str:
|
||||
h = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
return f"{h}:{max_length}:{min_length}"
|
||||
return f"{h}:{max_length}:{min_length}:{top_skills}"
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
@@ -38,52 +37,39 @@ async def health():
|
||||
|
||||
|
||||
_TECH = [
|
||||
"python",
|
||||
"c#",
|
||||
"dotnet",
|
||||
".net",
|
||||
"java",
|
||||
"javascript",
|
||||
"typescript",
|
||||
"react",
|
||||
"node",
|
||||
"sql",
|
||||
"postgres",
|
||||
"postgresql",
|
||||
"mysql",
|
||||
"sqlite",
|
||||
"mongodb",
|
||||
"redis",
|
||||
"aws",
|
||||
"azure",
|
||||
"gcp",
|
||||
"docker",
|
||||
"kubernetes",
|
||||
"terraform",
|
||||
"linux",
|
||||
"git",
|
||||
"ci/cd",
|
||||
"graphql",
|
||||
"rest",
|
||||
"python", "c#", "dotnet", ".net", "java", "javascript", "typescript", "react", "node", "sql",
|
||||
"postgres", "postgresql", "mysql", "sqlite", "mongodb", "redis", "aws", "azure", "gcp",
|
||||
"docker", "kubernetes", "terraform", "linux", "git", "ci/cd", "graphql", "rest",
|
||||
]
|
||||
|
||||
_SOFT = [
|
||||
"communication",
|
||||
"collaboration",
|
||||
"teamwork",
|
||||
"problem solving",
|
||||
"leadership",
|
||||
"mentoring",
|
||||
"ownership",
|
||||
"initiative",
|
||||
"adaptability",
|
||||
"stakeholder management",
|
||||
"detail oriented",
|
||||
"communication", "collaboration", "teamwork", "problem solving", "leadership", "mentoring",
|
||||
"ownership", "initiative", "adaptability", "stakeholder management", "detail oriented",
|
||||
]
|
||||
|
||||
_TECH_PRIORITY = [
|
||||
"python", "c#", ".net", "dotnet", "typescript", "javascript", "react", "node",
|
||||
"sql", "postgresql", "postgres", "mysql", "sqlite", "docker", "kubernetes",
|
||||
"aws", "azure", "gcp", "terraform", "graphql", "rest", "git",
|
||||
]
|
||||
|
||||
|
||||
def _rank_tech_skills(skills):
|
||||
ordered = []
|
||||
seen = set()
|
||||
for preferred in _TECH_PRIORITY:
|
||||
for skill in skills:
|
||||
if skill == preferred and skill not in seen:
|
||||
ordered.append(skill)
|
||||
seen.add(skill)
|
||||
for skill in skills:
|
||||
if skill not in seen:
|
||||
ordered.append(skill)
|
||||
seen.add(skill)
|
||||
return ordered
|
||||
|
||||
|
||||
def _strip_html(text: str) -> str:
|
||||
# Good enough for job descriptions pasted from the web.
|
||||
text = re.sub(r"<\s*br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"</p\s*>", "\n", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
@@ -105,6 +91,21 @@ def _extract_bullets(lines, max_items=8):
|
||||
return out
|
||||
|
||||
|
||||
def _top_keywords(text: str, limit=6):
|
||||
words = re.findall(r"[a-zA-Z][a-zA-Z+#./-]{2,}", text.lower())
|
||||
stop = {
|
||||
"with", "from", "that", "this", "will", "have", "your", "their", "about", "role", "team", "work",
|
||||
"experience", "skills", "requirements", "responsibilities", "company", "using", "ability", "years",
|
||||
}
|
||||
counts = {}
|
||||
for word in words:
|
||||
if word in stop or word in _TECH or word in _SOFT:
|
||||
continue
|
||||
counts[word] = counts.get(word, 0) + 1
|
||||
ordered = sorted(counts.items(), key=lambda item: (-item[1], item[0]))
|
||||
return [word for word, _ in ordered[:limit]]
|
||||
|
||||
|
||||
def _role_focused_excerpt(text: str) -> dict:
|
||||
cleaned = _strip_html(text)
|
||||
lines = [ln.strip() for ln in cleaned.splitlines()]
|
||||
@@ -117,10 +118,10 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
|
||||
def match_heading(s: str):
|
||||
sl = s.lower().strip(":-\x7f ")
|
||||
for k, words in headings.items():
|
||||
for w in words:
|
||||
if sl == w or sl.startswith(w + " "):
|
||||
return k
|
||||
for key, words in headings.items():
|
||||
for word in words:
|
||||
if sl == word or sl.startswith(word + " "):
|
||||
return key
|
||||
return None
|
||||
|
||||
section = None
|
||||
@@ -131,11 +132,10 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
for ln in lines:
|
||||
if not ln:
|
||||
continue
|
||||
h = match_heading(ln)
|
||||
if h:
|
||||
section = h
|
||||
heading = match_heading(ln)
|
||||
if heading:
|
||||
section = heading
|
||||
continue
|
||||
|
||||
if section == "responsibilities":
|
||||
resp_lines.append(ln)
|
||||
elif section == "requirements":
|
||||
@@ -157,7 +157,6 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
if s in low:
|
||||
soft_found.append(s)
|
||||
|
||||
# Fallback: pick bullet-like lines anywhere if sections are missing.
|
||||
if not responsibilities and not requirements:
|
||||
any_bullets = _extract_bullets(lines, max_items=10)
|
||||
responsibilities = any_bullets[:6]
|
||||
@@ -170,8 +169,6 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
focused_parts.append("Requirements:\n- " + "\n- ".join(requirements))
|
||||
if nice:
|
||||
focused_parts.append("Nice to have:\n- " + "\n- ".join(nice))
|
||||
|
||||
# Always include a small slice of the original for context.
|
||||
focused_parts.append("Context:\n" + cleaned[:1500])
|
||||
|
||||
return {
|
||||
@@ -182,6 +179,7 @@ def _role_focused_excerpt(text: str) -> dict:
|
||||
"nice": nice,
|
||||
"tech": tech_found,
|
||||
"soft": soft_found,
|
||||
"keywords": _top_keywords(cleaned),
|
||||
}
|
||||
|
||||
|
||||
@@ -206,20 +204,18 @@ async def summarize(req: SummarizeRequest):
|
||||
if req.min_length >= req.max_length:
|
||||
raise HTTPException(status_code=400, detail="min_length must be smaller than max_length.")
|
||||
|
||||
key = _key(req.text, req.max_length, req.min_length)
|
||||
key = _key(req.text, req.max_length, req.min_length, req.top_skills)
|
||||
if key in cache:
|
||||
return {"summary": cache[key], "cached": True}
|
||||
|
||||
info = _role_focused_excerpt(req.text)
|
||||
|
||||
# Summarize the role-focused excerpt instead of the whole job post.
|
||||
summary = _model_summarize(info["focused_input"], req.max_length, req.min_length)
|
||||
|
||||
lines = ["Role summary:", summary]
|
||||
|
||||
if info["requirements"]:
|
||||
lines.append("")
|
||||
lines.append("What they need from you:")
|
||||
lines.append("What the company wants most:")
|
||||
for x in info["requirements"][:7]:
|
||||
lines.append(f"- {x}")
|
||||
|
||||
@@ -241,7 +237,7 @@ async def summarize(req: SummarizeRequest):
|
||||
if t not in uniq:
|
||||
uniq.append(t)
|
||||
lines.append("")
|
||||
lines.append("Top hard skills: " + ", ".join(uniq[:10]))
|
||||
lines.append("Top hard skills: " + ", ".join(uniq[: req.top_skills]))
|
||||
|
||||
if info["soft"]:
|
||||
uniq_soft = []
|
||||
@@ -251,18 +247,20 @@ async def summarize(req: SummarizeRequest):
|
||||
lines.append("")
|
||||
lines.append("Relevant soft skills: " + ", ".join(uniq_soft[:8]))
|
||||
|
||||
out = "\n".join(lines).strip()
|
||||
cache[key] = out
|
||||
return {"summary": out, "cached": False}
|
||||
skills: " + ", ".join(uniq[:14]))
|
||||
|
||||
if info["soft"]:
|
||||
uniq_soft = []
|
||||
for s in info["soft"]:
|
||||
if s not in uniq_soft:
|
||||
uniq_soft.append(s)
|
||||
if info["keywords"]:
|
||||
lines.append("")
|
||||
lines.append("Relevant soft skills: " + ", ".join(uniq_soft[:8]))
|
||||
lines.append("Key themes: " + ", ".join(info["keywords"][:6]))
|
||||
|
||||
lines.append("")
|
||||
lines.append("Interview focus:")
|
||||
if info["requirements"]:
|
||||
for x in info["requirements"][:3]:
|
||||
lines.append(f"- Prepare examples that demonstrate: {x}")
|
||||
elif info["tech"]:
|
||||
for x in _rank_tech_skills(info["tech"])[:3]:
|
||||
lines.append(f"- Be ready to explain your hands-on experience with {x}")
|
||||
else:
|
||||
lines.append("- Prepare examples showing relevant impact, collaboration, and delivery.")
|
||||
|
||||
out = "\n".join(lines).strip()
|
||||
cache[key] = out
|
||||
|
||||
Reference in New Issue
Block a user