fix: harden admin system fallback and benchmark review

This commit is contained in:
2026-04-01 13:38:22 +02:00
parent cc55fc0cf8
commit 9191e4cc5b
4 changed files with 264 additions and 7 deletions
@@ -96,6 +96,39 @@ describe('AdminSystemPage', () => {
rootPath: '/data/CvBenchmarks/latest',
lastUpdatedAtUtc: '2026-03-23T10:10:00Z',
reportMarkdown: '# CV benchmark report\n\n- Files: 4',
indexJson: JSON.stringify({
CorpusRoot: '/home/pi/cvs',
OutputRoot: '/data/CvBenchmarks/latest',
GeneratedAtUtc: '2026-03-23T10:10:00Z',
TotalFiles: 4,
AverageCoverage: 0.72,
AverageConfidence: 0.66,
AverageConsistency: 0.94,
FilesWithSuspiciousLocations: 1,
MissingApprovedFixtures: 4,
Entries: [
{
FileName: 'cv.txt',
Slug: 'cv-txt',
Extension: '.txt',
Characters: 2000,
OutputPath: '/data/CvBenchmarks/latest/outputs/cv-txt.json',
ApprovedFixturePath: null,
CandidateFixturePath: '/data/CvBenchmarks/latest/candidate-fixtures/cv-txt.json',
ContactLocation: 'San Francisco, Hobbies',
FirstJob: '* July',
FirstJobLocation: null,
FirstEducation: '* September',
FirstEducationLocation: null,
QualificationLevels: ['Other'],
SuspiciousLocations: [],
CoverageScore: 0.5,
ConfidenceScore: 0.65,
ConsistencyScore: 0.8,
DiffSummary: 'No approved fixture yet — candidate fixture written.',
},
],
}),
},
} as any);
}
@@ -138,6 +171,8 @@ describe('AdminSystemPage', () => {
expect(screen.getByText(/ollama version/i)).toBeTruthy();
expect(screen.getByText(/model · qwen2.5:7b/i)).toBeTruthy();
expect(screen.getByText(/cv benchmark review/i)).toBeTruthy();
expect(screen.getByText(/top parser findings/i)).toBeTruthy();
expect(screen.getByText(/suspicious contact location: san francisco, hobbies/i)).toBeTruthy();
expect(screen.getByText('OCR avg latency')).toBeTruthy();
expect(screen.getByText('88.4 ms')).toBeTruthy();
});
+139 -5
View File
@@ -68,6 +68,40 @@ type EditableEmailSettings = {
hasPassword: boolean;
};
type CvBenchmarkEntry = {
FileName: string;
Slug: string;
Extension: string;
Characters: number;
OutputPath: string;
ApprovedFixturePath?: string | null;
CandidateFixturePath?: string | null;
ContactLocation?: string | null;
FirstJob?: string | null;
FirstJobLocation?: string | null;
FirstEducation?: string | null;
FirstEducationLocation?: string | null;
QualificationLevels: string[];
SuspiciousLocations: string[];
CoverageScore: number;
ConfidenceScore: number;
ConsistencyScore: number;
DiffSummary?: string | null;
};
type CvBenchmarkIndex = {
CorpusRoot: string;
OutputRoot: string;
GeneratedAtUtc: string;
TotalFiles: number;
AverageCoverage: number;
AverageConfidence: number;
AverageConsistency: number;
FilesWithSuspiciousLocations: number;
MissingApprovedFixtures: number;
Entries: CvBenchmarkEntry[];
};
type CvBenchmarkStatus = {
indexJson?: string | null;
reportMarkdown?: string | null;
@@ -136,6 +170,26 @@ function formatDate(value?: string | null) {
return value ? new Date(value).toLocaleString() : "-";
}
function formatPercent(value?: number | null) {
return typeof value === "number" ? `${Math.round(value * 100)}%` : "-";
}
function parseBenchmarkIndex(indexJson?: string | null): CvBenchmarkIndex | null {
if (!indexJson?.trim()) return null;
try {
return JSON.parse(indexJson) as CvBenchmarkIndex;
} catch {
return null;
}
}
function benchmarkTone(value?: number | null) {
if (typeof value !== "number") return "default" as const;
if (value >= 0.8) return "success" as const;
if (value >= 0.6) return "warning" as const;
return "error" as const;
}
function SummaryCard({ title, value, subtitle, tone = "default" }: { title: string; value: string; subtitle?: string; tone?: "default" | "success" | "warning" | "error" }) {
const color = tone === "success" ? "success.main" : tone === "warning" ? "warning.main" : tone === "error" ? "error.main" : "text.primary";
return (
@@ -210,6 +264,34 @@ export default function AdminSystemPage() {
return "success" as const;
}, [status]);
const benchmarkIndex = useMemo(() => parseBenchmarkIndex(benchmarkStatus?.indexJson), [benchmarkStatus?.indexJson]);
const weakestEntries = useMemo(() => {
if (!benchmarkIndex) return [] as CvBenchmarkEntry[];
return [...benchmarkIndex.Entries]
.sort((a, b) => (a.CoverageScore + a.ConfidenceScore + a.ConsistencyScore) - (b.CoverageScore + b.ConfidenceScore + b.ConsistencyScore))
.slice(0, 6);
}, [benchmarkIndex]);
const benchmarkFindings = useMemo(() => {
if (!benchmarkIndex) return [] as Array<{ file: string; issue: string }>;
return benchmarkIndex.Entries.flatMap((entry) => {
const findings: Array<{ file: string; issue: string }> = [];
if (entry.ContactLocation && /(culture|education|arial|hobbies|cooperate|ag, ni|bold)/i.test(entry.ContactLocation)) {
findings.push({ file: entry.FileName, issue: `Suspicious contact location: ${entry.ContactLocation}` });
}
if (entry.FirstEducation && entry.FirstEducation.length > 120) {
findings.push({ file: entry.FileName, issue: "Education qualification looks over-captured." });
}
if ((entry.FirstJob ?? "").length > 120) {
findings.push({ file: entry.FileName, issue: "Work title looks over-captured." });
}
if ((entry.QualificationLevels ?? []).includes("Other")) {
findings.push({ file: entry.FileName, issue: "Qualification level fell back to Other." });
}
return findings;
}).slice(0, 10);
}, [benchmarkIndex]);
const sendTestEmail = async () => {
setSendingTestEmail(true);
try {
@@ -430,12 +512,64 @@ export default function AdminSystemPage() {
<Stack spacing={0.75}>
<DetailRow label="Benchmark root" value={benchmarkStatus?.rootPath || "-"} />
<DetailRow label="Last benchmark update" value={formatDate(benchmarkStatus?.lastUpdatedAtUtc)} />
<DetailRow label="Corpus root" value={benchmarkIndex?.CorpusRoot || "-"} />
</Stack>
<Box sx={{ mt: 1.5, p: 1.5, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider", maxHeight: 260, overflow: "auto" }}>
<Typography variant="body2" sx={{ whiteSpace: "pre-wrap", fontFamily: "ui-monospace, SFMono-Regular, monospace" }}>
{benchmarkStatus?.reportMarkdown || "Run scripts/run-cv-benchmark.sh to generate the latest corpus report and fixture candidates."}
</Typography>
</Box>
{benchmarkIndex ? (
<>
<Box sx={{ mt: 2, display: "grid", gridTemplateColumns: { xs: "1fr 1fr", md: "repeat(5, 1fr)" }, gap: 1.25 }}>
<SummaryCard title="Files" value={String(benchmarkIndex.TotalFiles)} subtitle="Corpus inputs" />
<SummaryCard title="Coverage" value={formatPercent(benchmarkIndex.AverageCoverage)} subtitle="Structured field coverage" tone={benchmarkTone(benchmarkIndex.AverageCoverage)} />
<SummaryCard title="Confidence" value={formatPercent(benchmarkIndex.AverageConfidence)} subtitle="Field metadata confidence" tone={benchmarkTone(benchmarkIndex.AverageConfidence)} />
<SummaryCard title="Consistency" value={formatPercent(benchmarkIndex.AverageConsistency)} subtitle="Normalization consistency" tone={benchmarkTone(benchmarkIndex.AverageConsistency)} />
<SummaryCard title="Missing approved" value={String(benchmarkIndex.MissingApprovedFixtures)} subtitle="Needs fixture review" tone={benchmarkIndex.MissingApprovedFixtures > 0 ? "warning" : "success"} />
</Box>
<Box sx={{ mt: 2.5, display: "grid", gridTemplateColumns: { xs: "1fr", xl: "1.1fr 0.9fr" }, gap: 2 }}>
<Paper variant="outlined" sx={{ p: 1.5, borderRadius: 3 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 900, mb: 1 }}>Top parser findings</Typography>
<Stack spacing={1}>
{benchmarkFindings.length > 0 ? benchmarkFindings.map((finding) => (
<Box key={`${finding.file}:${finding.issue}`} sx={{ p: 1.25, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider" }}>
<Typography variant="caption" sx={{ color: "text.secondary" }}>{finding.file}</Typography>
<Typography variant="body2">{finding.issue}</Typography>
</Box>
)) : <Typography variant="body2" sx={{ color: "text.secondary" }}>No standout benchmark anomalies in the current run.</Typography>}
</Stack>
</Paper>
<Paper variant="outlined" sx={{ p: 1.5, borderRadius: 3 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 900, mb: 1 }}>Weakest files in current run</Typography>
<Stack spacing={1}>
{weakestEntries.map((entry) => (
<Box key={entry.Slug} sx={{ p: 1.25, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider" }}>
<Typography variant="body2" sx={{ fontWeight: 800 }}>{entry.FileName}</Typography>
<Box sx={{ display: "flex", gap: 0.75, flexWrap: "wrap", mt: 0.75 }}>
<Chip size="small" label={`Coverage ${formatPercent(entry.CoverageScore)}`} color={benchmarkTone(entry.CoverageScore)} />
<Chip size="small" label={`Confidence ${formatPercent(entry.ConfidenceScore)}`} color={benchmarkTone(entry.ConfidenceScore)} />
<Chip size="small" label={`Consistency ${formatPercent(entry.ConsistencyScore)}`} color={benchmarkTone(entry.ConsistencyScore)} />
</Box>
<Typography variant="caption" sx={{ display: "block", color: "text.secondary", mt: 0.75 }}>{entry.DiffSummary || "-"}</Typography>
</Box>
))}
</Stack>
</Paper>
</Box>
<Box sx={{ mt: 2, p: 1.5, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider", maxHeight: 280, overflow: "auto" }}>
<Typography variant="subtitle2" sx={{ fontWeight: 800, mb: 1 }}>Latest markdown summary</Typography>
<Typography variant="body2" sx={{ whiteSpace: "pre-wrap", fontFamily: "ui-monospace, SFMono-Regular, monospace" }}>
{benchmarkStatus?.reportMarkdown || "-"}
</Typography>
</Box>
</>
) : (
<Box sx={{ mt: 1.5, p: 1.5, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider" }}>
<Typography variant="body2" sx={{ whiteSpace: "pre-wrap", fontFamily: "ui-monospace, SFMono-Regular, monospace" }}>
{benchmarkStatus?.reportMarkdown || "Run scripts/run-cv-benchmark.sh to generate the latest corpus report and fixture candidates."}
</Typography>
</Box>
)}
</Paper>
</>
) : (