First Commit

This commit is contained in:
cesnimda
2026-03-21 11:55:27 +01:00
commit 2e8a29b4d0
1757 changed files with 166084 additions and 0 deletions
+7
View File
@@ -0,0 +1,7 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8001
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8001"]
+32
View File
@@ -0,0 +1,32 @@
# Local Hugging Face Summarizer
This small service runs a Hugging Face summarization model locally and exposes a simple HTTP API.
Install (recommended: virtualenv)
Windows (CPU PyTorch wheel may be required):
```powershell
python -m venv .venv
.\.venv\Scripts\Activate.ps1
pip install -r requirements.txt
# If torch wheel installation is needed, follow instructions at https://pytorch.org
python -m uvicorn app:app --host 127.0.0.1 --port 8001 --workers 1
```
Linux / macOS:
```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
python -m uvicorn app:app --host 127.0.0.1 --port 8001 --workers 1
```
API
- `GET /health` — health check
- `POST /summarize` — JSON body `{ "text": "...", "max_length": 150, "min_length": 30 }` returns `{ "summary": "...", "cached": false }`
Notes
- Model will be downloaded on first run and can be several hundred MB.
- For lower memory usage, consider `sshleifer/tiny-distilbart-cnn-6-6` or `t5-small`.
+218
View File
@@ -0,0 +1,218 @@
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from cachetools import TTLCache
import hashlib
import re
import torch
app = FastAPI(title="Local Summarizer")
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
cache = TTLCache(maxsize=1024, ttl=60 * 60) # 1 hour cache
class SummarizeRequest(BaseModel):
text: str
max_length: int = 160
min_length: int = 45
def _key(text: str, max_length: int, min_length: int) -> str:
h = hashlib.sha256(text.encode("utf-8")).hexdigest()
return f"{h}:{max_length}:{min_length}"
@app.get("/health")
async def health():
return {"ok": True, "model": MODEL_NAME}
_TECH = [
"python",
"c#",
"dotnet",
".net",
"java",
"javascript",
"typescript",
"react",
"node",
"sql",
"postgres",
"postgresql",
"mysql",
"sqlite",
"mongodb",
"redis",
"aws",
"azure",
"gcp",
"docker",
"kubernetes",
"terraform",
"linux",
"git",
"ci/cd",
"graphql",
"rest",
]
def _strip_html(text: str) -> str:
# Good enough for job descriptions pasted from the web.
text = re.sub(r"<\s*br\s*/?>", "\n", text, flags=re.IGNORECASE)
text = re.sub(r"</p\s*>", "\n", text, flags=re.IGNORECASE)
text = re.sub(r"<[^>]+>", " ", text)
return re.sub(r"\n{3,}", "\n\n", text).strip()
def _extract_bullets(lines, max_items=8):
out = []
for ln in lines:
s = ln.strip()
if not s:
continue
if re.match(r"^([-*]|\u2022)\s+", s):
s = re.sub(r"^([-*]|\u2022)\s+", "", s).strip()
if 3 <= len(s) <= 200:
out.append(s)
if len(out) >= max_items:
break
return out
def _role_focused_excerpt(text: str) -> dict:
cleaned = _strip_html(text)
lines = [ln.strip() for ln in cleaned.splitlines()]
headings = {
"responsibilities": ["responsibilities", "what you will do", "what you'll do", "the role", "your role", "you will"],
"requirements": ["requirements", "what we are looking for", "what we're looking for", "skills", "experience", "must have"],
"nice": ["nice to have", "bonus", "preferred"],
}
def match_heading(s: str):
sl = s.lower().strip(":- ")
for k, words in headings.items():
for w in words:
if sl == w or sl.startswith(w + " "):
return k
return None
section = None
resp_lines = []
req_lines = []
nice_lines = []
for ln in lines:
if not ln:
continue
h = match_heading(ln)
if h:
section = h
continue
if section == "responsibilities":
resp_lines.append(ln)
elif section == "requirements":
req_lines.append(ln)
elif section == "nice":
nice_lines.append(ln)
responsibilities = _extract_bullets(resp_lines, max_items=7)
requirements = _extract_bullets(req_lines, max_items=7)
nice = _extract_bullets(nice_lines, max_items=5)
tech_found = []
low = cleaned.lower()
for t in _TECH:
if t in low:
tech_found.append(t)
# Fallback: pick bullet-like lines anywhere if sections are missing.
if not responsibilities and not requirements:
any_bullets = _extract_bullets(lines, max_items=10)
responsibilities = any_bullets[:6]
requirements = any_bullets[6:10]
focused_parts = []
if responsibilities:
focused_parts.append("Responsibilities:\n- " + "\n- ".join(responsibilities))
if requirements:
focused_parts.append("Requirements:\n- " + "\n- ".join(requirements))
if nice:
focused_parts.append("Nice to have:\n- " + "\n- ".join(nice))
# Always include a small slice of the original for context.
focused_parts.append("Context:\n" + cleaned[:1500])
return {
"cleaned": cleaned,
"focused_input": "\n\n".join(focused_parts),
"responsibilities": responsibilities,
"requirements": requirements,
"nice": nice,
"tech": tech_found,
}
def _model_summarize(text: str, max_length: int, min_length: int) -> str:
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device) if hasattr(inputs, "attention_mask") else None
with torch.no_grad():
outputs = model.generate(
input_ids,
attention_mask=attention_mask,
max_length=max_length,
min_length=min_length,
num_beams=4,
early_stopping=True,
)
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
@app.post("/summarize")
async def summarize(req: SummarizeRequest):
key = _key(req.text, req.max_length, req.min_length)
if key in cache:
return {"summary": cache[key], "cached": True}
info = _role_focused_excerpt(req.text)
# Summarize the role-focused excerpt instead of the whole job post.
summary = _model_summarize(info["focused_input"], req.max_length, req.min_length)
lines = ["Role summary:", summary]
if info["responsibilities"]:
lines.append("")
lines.append("Key responsibilities:")
for x in info["responsibilities"][:6]:
lines.append(f"- {x}")
if info["requirements"]:
lines.append("")
lines.append("Key requirements:")
for x in info["requirements"][:6]:
lines.append(f"- {x}")
if info["tech"]:
# Keep this short; it's just a hint based on keyword matches.
uniq = []
for t in info["tech"]:
if t not in uniq:
uniq.append(t)
lines.append("")
lines.append("Tech keywords: " + ", ".join(uniq[:14]))
out = "\n".join(lines).strip()
cache[key] = out
return {"summary": out, "cached": False}
+6
View File
@@ -0,0 +1,6 @@
fastapi>=0.85
uvicorn[standard]>=0.18
transformers>=4.30
torch>=1.13
cachetools>=5.0
pydantic>=1.10
+12
View File
@@ -0,0 +1,12 @@
Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.
D:\Job tracker\.venv\Lib\site-packages\huggingface_hub\file_download.py:129: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\Cesnimda\.cache\huggingface\hub\models--sshleifer--distilbart-cnn-12-6. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
warnings.warn(message)
Please make sure the generation config includes `forced_bos_token_id=0`.
Loading weights: 0%| | 0/358 [00:00<?, ?it/s]
Loading weights: 100%|##########| 358/358 [00:00<00:00, 24690.63it/s]
The tied weights mapping and config for this model specifies to tie model.shared.weight to model.decoder.embed_tokens.weight, but both are present in the checkpoints, so we will NOT tie them. You should update the config with `tie_word_embeddings=False` to silence this warning
The tied weights mapping and config for this model specifies to tie model.shared.weight to model.encoder.embed_tokens.weight, but both are present in the checkpoints, so we will NOT tie them. You should update the config with `tie_word_embeddings=False` to silence this warning
INFO: Started server process [30848]
INFO: Waiting for application startup.
+8
View File
@@ -0,0 +1,8 @@
INFO: 127.0.0.1:61287 - "GET /health HTTP/1.1" 200 OK
INFO: 127.0.0.1:56126 - "POST /summarize HTTP/1.1" 200 OK
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
INFO: 127.0.0.1:55075 - "POST /summarize HTTP/1.1" 200 OK
INFO: 127.0.0.1:51005 - "POST /summarize HTTP/1.1" 200 OK