First Commit
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
FROM python:3.11-slim
|
||||
WORKDIR /app
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY . .
|
||||
EXPOSE 8001
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8001"]
|
||||
@@ -0,0 +1,32 @@
|
||||
# Local Hugging Face Summarizer
|
||||
|
||||
This small service runs a Hugging Face summarization model locally and exposes a simple HTTP API.
|
||||
|
||||
Install (recommended: virtualenv)
|
||||
|
||||
Windows (CPU PyTorch wheel may be required):
|
||||
|
||||
```powershell
|
||||
python -m venv .venv
|
||||
.\.venv\Scripts\Activate.ps1
|
||||
pip install -r requirements.txt
|
||||
# If torch wheel installation is needed, follow instructions at https://pytorch.org
|
||||
python -m uvicorn app:app --host 127.0.0.1 --port 8001 --workers 1
|
||||
```
|
||||
|
||||
Linux / macOS:
|
||||
|
||||
```bash
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python -m uvicorn app:app --host 127.0.0.1 --port 8001 --workers 1
|
||||
```
|
||||
|
||||
API
|
||||
- `GET /health` — health check
|
||||
- `POST /summarize` — JSON body `{ "text": "...", "max_length": 150, "min_length": 30 }` returns `{ "summary": "...", "cached": false }`
|
||||
|
||||
Notes
|
||||
- Model will be downloaded on first run and can be several hundred MB.
|
||||
- For lower memory usage, consider `sshleifer/tiny-distilbart-cnn-6-6` or `t5-small`.
|
||||
@@ -0,0 +1,218 @@
|
||||
from fastapi import FastAPI
|
||||
from pydantic import BaseModel
|
||||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
||||
from cachetools import TTLCache
|
||||
import hashlib
|
||||
import re
|
||||
import torch
|
||||
|
||||
app = FastAPI(title="Local Summarizer")
|
||||
|
||||
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
||||
model.eval()
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model.to(device)
|
||||
cache = TTLCache(maxsize=1024, ttl=60 * 60) # 1 hour cache
|
||||
|
||||
|
||||
class SummarizeRequest(BaseModel):
|
||||
text: str
|
||||
max_length: int = 160
|
||||
min_length: int = 45
|
||||
|
||||
|
||||
def _key(text: str, max_length: int, min_length: int) -> str:
|
||||
h = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
return f"{h}:{max_length}:{min_length}"
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {"ok": True, "model": MODEL_NAME}
|
||||
|
||||
|
||||
_TECH = [
|
||||
"python",
|
||||
"c#",
|
||||
"dotnet",
|
||||
".net",
|
||||
"java",
|
||||
"javascript",
|
||||
"typescript",
|
||||
"react",
|
||||
"node",
|
||||
"sql",
|
||||
"postgres",
|
||||
"postgresql",
|
||||
"mysql",
|
||||
"sqlite",
|
||||
"mongodb",
|
||||
"redis",
|
||||
"aws",
|
||||
"azure",
|
||||
"gcp",
|
||||
"docker",
|
||||
"kubernetes",
|
||||
"terraform",
|
||||
"linux",
|
||||
"git",
|
||||
"ci/cd",
|
||||
"graphql",
|
||||
"rest",
|
||||
]
|
||||
|
||||
|
||||
def _strip_html(text: str) -> str:
|
||||
# Good enough for job descriptions pasted from the web.
|
||||
text = re.sub(r"<\s*br\s*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"</p\s*>", "\n", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
return re.sub(r"\n{3,}", "\n\n", text).strip()
|
||||
|
||||
|
||||
def _extract_bullets(lines, max_items=8):
|
||||
out = []
|
||||
for ln in lines:
|
||||
s = ln.strip()
|
||||
if not s:
|
||||
continue
|
||||
if re.match(r"^([-*]|\u2022)\s+", s):
|
||||
s = re.sub(r"^([-*]|\u2022)\s+", "", s).strip()
|
||||
if 3 <= len(s) <= 200:
|
||||
out.append(s)
|
||||
if len(out) >= max_items:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
def _role_focused_excerpt(text: str) -> dict:
|
||||
cleaned = _strip_html(text)
|
||||
lines = [ln.strip() for ln in cleaned.splitlines()]
|
||||
|
||||
headings = {
|
||||
"responsibilities": ["responsibilities", "what you will do", "what you'll do", "the role", "your role", "you will"],
|
||||
"requirements": ["requirements", "what we are looking for", "what we're looking for", "skills", "experience", "must have"],
|
||||
"nice": ["nice to have", "bonus", "preferred"],
|
||||
}
|
||||
|
||||
def match_heading(s: str):
|
||||
sl = s.lower().strip(":- ")
|
||||
for k, words in headings.items():
|
||||
for w in words:
|
||||
if sl == w or sl.startswith(w + " "):
|
||||
return k
|
||||
return None
|
||||
|
||||
section = None
|
||||
resp_lines = []
|
||||
req_lines = []
|
||||
nice_lines = []
|
||||
|
||||
for ln in lines:
|
||||
if not ln:
|
||||
continue
|
||||
h = match_heading(ln)
|
||||
if h:
|
||||
section = h
|
||||
continue
|
||||
|
||||
if section == "responsibilities":
|
||||
resp_lines.append(ln)
|
||||
elif section == "requirements":
|
||||
req_lines.append(ln)
|
||||
elif section == "nice":
|
||||
nice_lines.append(ln)
|
||||
|
||||
responsibilities = _extract_bullets(resp_lines, max_items=7)
|
||||
requirements = _extract_bullets(req_lines, max_items=7)
|
||||
nice = _extract_bullets(nice_lines, max_items=5)
|
||||
|
||||
tech_found = []
|
||||
low = cleaned.lower()
|
||||
for t in _TECH:
|
||||
if t in low:
|
||||
tech_found.append(t)
|
||||
|
||||
# Fallback: pick bullet-like lines anywhere if sections are missing.
|
||||
if not responsibilities and not requirements:
|
||||
any_bullets = _extract_bullets(lines, max_items=10)
|
||||
responsibilities = any_bullets[:6]
|
||||
requirements = any_bullets[6:10]
|
||||
|
||||
focused_parts = []
|
||||
if responsibilities:
|
||||
focused_parts.append("Responsibilities:\n- " + "\n- ".join(responsibilities))
|
||||
if requirements:
|
||||
focused_parts.append("Requirements:\n- " + "\n- ".join(requirements))
|
||||
if nice:
|
||||
focused_parts.append("Nice to have:\n- " + "\n- ".join(nice))
|
||||
|
||||
# Always include a small slice of the original for context.
|
||||
focused_parts.append("Context:\n" + cleaned[:1500])
|
||||
|
||||
return {
|
||||
"cleaned": cleaned,
|
||||
"focused_input": "\n\n".join(focused_parts),
|
||||
"responsibilities": responsibilities,
|
||||
"requirements": requirements,
|
||||
"nice": nice,
|
||||
"tech": tech_found,
|
||||
}
|
||||
|
||||
|
||||
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
|
||||
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
|
||||
input_ids = inputs.input_ids.to(device)
|
||||
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(
|
||||
input_ids,
|
||||
attention_mask=attention_mask,
|
||||
max_length=max_length,
|
||||
min_length=min_length,
|
||||
num_beams=4,
|
||||
early_stopping=True,
|
||||
)
|
||||
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
||||
|
||||
|
||||
@app.post("/summarize")
|
||||
async def summarize(req: SummarizeRequest):
|
||||
key = _key(req.text, req.max_length, req.min_length)
|
||||
if key in cache:
|
||||
return {"summary": cache[key], "cached": True}
|
||||
|
||||
info = _role_focused_excerpt(req.text)
|
||||
|
||||
# Summarize the role-focused excerpt instead of the whole job post.
|
||||
summary = _model_summarize(info["focused_input"], req.max_length, req.min_length)
|
||||
|
||||
lines = ["Role summary:", summary]
|
||||
|
||||
if info["responsibilities"]:
|
||||
lines.append("")
|
||||
lines.append("Key responsibilities:")
|
||||
for x in info["responsibilities"][:6]:
|
||||
lines.append(f"- {x}")
|
||||
|
||||
if info["requirements"]:
|
||||
lines.append("")
|
||||
lines.append("Key requirements:")
|
||||
for x in info["requirements"][:6]:
|
||||
lines.append(f"- {x}")
|
||||
|
||||
if info["tech"]:
|
||||
# Keep this short; it's just a hint based on keyword matches.
|
||||
uniq = []
|
||||
for t in info["tech"]:
|
||||
if t not in uniq:
|
||||
uniq.append(t)
|
||||
lines.append("")
|
||||
lines.append("Tech keywords: " + ", ".join(uniq[:14]))
|
||||
|
||||
out = "\n".join(lines).strip()
|
||||
cache[key] = out
|
||||
return {"summary": out, "cached": False}
|
||||
@@ -0,0 +1,6 @@
|
||||
fastapi>=0.85
|
||||
uvicorn[standard]>=0.18
|
||||
transformers>=4.30
|
||||
torch>=1.13
|
||||
cachetools>=5.0
|
||||
pydantic>=1.10
|
||||
@@ -0,0 +1,12 @@
|
||||
Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.
|
||||
D:\Job tracker\.venv\Lib\site-packages\huggingface_hub\file_download.py:129: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\Cesnimda\.cache\huggingface\hub\models--sshleifer--distilbart-cnn-12-6. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
|
||||
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
|
||||
warnings.warn(message)
|
||||
Please make sure the generation config includes `forced_bos_token_id=0`.
|
||||
|
||||
Loading weights: 0%| | 0/358 [00:00<?, ?it/s]
|
||||
Loading weights: 100%|##########| 358/358 [00:00<00:00, 24690.63it/s]
|
||||
The tied weights mapping and config for this model specifies to tie model.shared.weight to model.decoder.embed_tokens.weight, but both are present in the checkpoints, so we will NOT tie them. You should update the config with `tie_word_embeddings=False` to silence this warning
|
||||
The tied weights mapping and config for this model specifies to tie model.shared.weight to model.encoder.embed_tokens.weight, but both are present in the checkpoints, so we will NOT tie them. You should update the config with `tie_word_embeddings=False` to silence this warning
|
||||
INFO: Started server process [30848]
|
||||
INFO: Waiting for application startup.
|
||||
@@ -0,0 +1,8 @@
|
||||
INFO: 127.0.0.1:61287 - "GET /health HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:56126 - "POST /summarize HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
|
||||
INFO: 127.0.0.1:51005 - "POST /summarize HTTP/1.1" 200 OK
|
||||
Reference in New Issue
Block a user