fix: harden admin system fallback and benchmark review

2026-04-01 13:38:22 +02:00
parent cc55fc0cf8
commit 9191e4cc5b
4 changed files with 264 additions and 7 deletions
@@ -96,6 +96,39 @@ describe('AdminSystemPage', () => {
            rootPath: '/data/CvBenchmarks/latest',
            lastUpdatedAtUtc: '2026-03-23T10:10:00Z',
            reportMarkdown: '# CV benchmark report\n\n- Files: 4',
+            indexJson: JSON.stringify({
+              CorpusRoot: '/home/pi/cvs',
+              OutputRoot: '/data/CvBenchmarks/latest',
+              GeneratedAtUtc: '2026-03-23T10:10:00Z',
+              TotalFiles: 4,
+              AverageCoverage: 0.72,
+              AverageConfidence: 0.66,
+              AverageConsistency: 0.94,
+              FilesWithSuspiciousLocations: 1,
+              MissingApprovedFixtures: 4,
+              Entries: [
+                {
+                  FileName: 'cv.txt',
+                  Slug: 'cv-txt',
+                  Extension: '.txt',
+                  Characters: 2000,
+                  OutputPath: '/data/CvBenchmarks/latest/outputs/cv-txt.json',
+                  ApprovedFixturePath: null,
+                  CandidateFixturePath: '/data/CvBenchmarks/latest/candidate-fixtures/cv-txt.json',
+                  ContactLocation: 'San Francisco, Hobbies',
+                  FirstJob: '* July',
+                  FirstJobLocation: null,
+                  FirstEducation: '* September',
+                  FirstEducationLocation: null,
+                  QualificationLevels: ['Other'],
+                  SuspiciousLocations: [],
+                  CoverageScore: 0.5,
+                  ConfidenceScore: 0.65,
+                  ConsistencyScore: 0.8,
+                  DiffSummary: 'No approved fixture yet — candidate fixture written.',
+                },
+              ],
+            }),
          },
        } as any);
      }
@@ -138,6 +171,8 @@ describe('AdminSystemPage', () => {
    expect(screen.getByText(/ollama version/i)).toBeTruthy();
    expect(screen.getByText(/model · qwen2.5:7b/i)).toBeTruthy();
    expect(screen.getByText(/cv benchmark review/i)).toBeTruthy();
+    expect(screen.getByText(/top parser findings/i)).toBeTruthy();
+    expect(screen.getByText(/suspicious contact location: san francisco, hobbies/i)).toBeTruthy();
    expect(screen.getByText('OCR avg latency')).toBeTruthy();
    expect(screen.getByText('88.4 ms')).toBeTruthy();
  });
@@ -68,6 +68,40 @@ type EditableEmailSettings = {
  hasPassword: boolean;
 };

+type CvBenchmarkEntry = {
+  FileName: string;
+  Slug: string;
+  Extension: string;
+  Characters: number;
+  OutputPath: string;
+  ApprovedFixturePath?: string | null;
+  CandidateFixturePath?: string | null;
+  ContactLocation?: string | null;
+  FirstJob?: string | null;
+  FirstJobLocation?: string | null;
+  FirstEducation?: string | null;
+  FirstEducationLocation?: string | null;
+  QualificationLevels: string[];
+  SuspiciousLocations: string[];
+  CoverageScore: number;
+  ConfidenceScore: number;
+  ConsistencyScore: number;
+  DiffSummary?: string | null;
+};
+
+type CvBenchmarkIndex = {
+  CorpusRoot: string;
+  OutputRoot: string;
+  GeneratedAtUtc: string;
+  TotalFiles: number;
+  AverageCoverage: number;
+  AverageConfidence: number;
+  AverageConsistency: number;
+  FilesWithSuspiciousLocations: number;
+  MissingApprovedFixtures: number;
+  Entries: CvBenchmarkEntry[];
+};
+
 type CvBenchmarkStatus = {
  indexJson?: string | null;
  reportMarkdown?: string | null;
@@ -136,6 +170,26 @@ function formatDate(value?: string | null) {
  return value ? new Date(value).toLocaleString() : "-";
 }

+function formatPercent(value?: number | null) {
+  return typeof value === "number" ? `${Math.round(value * 100)}%` : "-";
+}
+
+function parseBenchmarkIndex(indexJson?: string | null): CvBenchmarkIndex | null {
+  if (!indexJson?.trim()) return null;
+  try {
+    return JSON.parse(indexJson) as CvBenchmarkIndex;
+  } catch {
+    return null;
+  }
+}
+
+function benchmarkTone(value?: number | null) {
+  if (typeof value !== "number") return "default" as const;
+  if (value >= 0.8) return "success" as const;
+  if (value >= 0.6) return "warning" as const;
+  return "error" as const;
+}
+
 function SummaryCard({ title, value, subtitle, tone = "default" }: { title: string; value: string; subtitle?: string; tone?: "default" | "success" | "warning" | "error" }) {
  const color = tone === "success" ? "success.main" : tone === "warning" ? "warning.main" : tone === "error" ? "error.main" : "text.primary";
  return (
@@ -210,6 +264,34 @@ export default function AdminSystemPage() {
    return "success" as const;
  }, [status]);

+  const benchmarkIndex = useMemo(() => parseBenchmarkIndex(benchmarkStatus?.indexJson), [benchmarkStatus?.indexJson]);
+  const weakestEntries = useMemo(() => {
+    if (!benchmarkIndex) return [] as CvBenchmarkEntry[];
+    return [...benchmarkIndex.Entries]
+      .sort((a, b) => (a.CoverageScore + a.ConfidenceScore + a.ConsistencyScore) - (b.CoverageScore + b.ConfidenceScore + b.ConsistencyScore))
+      .slice(0, 6);
+  }, [benchmarkIndex]);
+
+  const benchmarkFindings = useMemo(() => {
+    if (!benchmarkIndex) return [] as Array<{ file: string; issue: string }>;
+    return benchmarkIndex.Entries.flatMap((entry) => {
+      const findings: Array<{ file: string; issue: string }> = [];
+      if (entry.ContactLocation && /(culture|education|arial|hobbies|cooperate|ag, ni|bold)/i.test(entry.ContactLocation)) {
+        findings.push({ file: entry.FileName, issue: `Suspicious contact location: ${entry.ContactLocation}` });
+      }
+      if (entry.FirstEducation && entry.FirstEducation.length > 120) {
+        findings.push({ file: entry.FileName, issue: "Education qualification looks over-captured." });
+      }
+      if ((entry.FirstJob ?? "").length > 120) {
+        findings.push({ file: entry.FileName, issue: "Work title looks over-captured." });
+      }
+      if ((entry.QualificationLevels ?? []).includes("Other")) {
+        findings.push({ file: entry.FileName, issue: "Qualification level fell back to Other." });
+      }
+      return findings;
+    }).slice(0, 10);
+  }, [benchmarkIndex]);
+
  const sendTestEmail = async () => {
    setSendingTestEmail(true);
    try {
@@ -430,12 +512,64 @@ export default function AdminSystemPage() {
            <Stack spacing={0.75}>
              <DetailRow label="Benchmark root" value={benchmarkStatus?.rootPath || "-"} />
              <DetailRow label="Last benchmark update" value={formatDate(benchmarkStatus?.lastUpdatedAtUtc)} />
+              <DetailRow label="Corpus root" value={benchmarkIndex?.CorpusRoot || "-"} />
            </Stack>
-            <Box sx={{ mt: 1.5, p: 1.5, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider", maxHeight: 260, overflow: "auto" }}>
-              <Typography variant="body2" sx={{ whiteSpace: "pre-wrap", fontFamily: "ui-monospace, SFMono-Regular, monospace" }}>
-                {benchmarkStatus?.reportMarkdown || "Run scripts/run-cv-benchmark.sh to generate the latest corpus report and fixture candidates."}
-              </Typography>
-            </Box>
+
+            {benchmarkIndex ? (
+              <>
+                <Box sx={{ mt: 2, display: "grid", gridTemplateColumns: { xs: "1fr 1fr", md: "repeat(5, 1fr)" }, gap: 1.25 }}>
+                  <SummaryCard title="Files" value={String(benchmarkIndex.TotalFiles)} subtitle="Corpus inputs" />
+                  <SummaryCard title="Coverage" value={formatPercent(benchmarkIndex.AverageCoverage)} subtitle="Structured field coverage" tone={benchmarkTone(benchmarkIndex.AverageCoverage)} />
+                  <SummaryCard title="Confidence" value={formatPercent(benchmarkIndex.AverageConfidence)} subtitle="Field metadata confidence" tone={benchmarkTone(benchmarkIndex.AverageConfidence)} />
+                  <SummaryCard title="Consistency" value={formatPercent(benchmarkIndex.AverageConsistency)} subtitle="Normalization consistency" tone={benchmarkTone(benchmarkIndex.AverageConsistency)} />
+                  <SummaryCard title="Missing approved" value={String(benchmarkIndex.MissingApprovedFixtures)} subtitle="Needs fixture review" tone={benchmarkIndex.MissingApprovedFixtures > 0 ? "warning" : "success"} />
+                </Box>
+
+                <Box sx={{ mt: 2.5, display: "grid", gridTemplateColumns: { xs: "1fr", xl: "1.1fr 0.9fr" }, gap: 2 }}>
+                  <Paper variant="outlined" sx={{ p: 1.5, borderRadius: 3 }}>
+                    <Typography variant="subtitle1" sx={{ fontWeight: 900, mb: 1 }}>Top parser findings</Typography>
+                    <Stack spacing={1}>
+                      {benchmarkFindings.length > 0 ? benchmarkFindings.map((finding) => (
+                        <Box key={`${finding.file}:${finding.issue}`} sx={{ p: 1.25, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider" }}>
+                          <Typography variant="caption" sx={{ color: "text.secondary" }}>{finding.file}</Typography>
+                          <Typography variant="body2">{finding.issue}</Typography>
+                        </Box>
+                      )) : <Typography variant="body2" sx={{ color: "text.secondary" }}>No standout benchmark anomalies in the current run.</Typography>}
+                    </Stack>
+                  </Paper>
+
+                  <Paper variant="outlined" sx={{ p: 1.5, borderRadius: 3 }}>
+                    <Typography variant="subtitle1" sx={{ fontWeight: 900, mb: 1 }}>Weakest files in current run</Typography>
+                    <Stack spacing={1}>
+                      {weakestEntries.map((entry) => (
+                        <Box key={entry.Slug} sx={{ p: 1.25, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider" }}>
+                          <Typography variant="body2" sx={{ fontWeight: 800 }}>{entry.FileName}</Typography>
+                          <Box sx={{ display: "flex", gap: 0.75, flexWrap: "wrap", mt: 0.75 }}>
+                            <Chip size="small" label={`Coverage ${formatPercent(entry.CoverageScore)}`} color={benchmarkTone(entry.CoverageScore)} />
+                            <Chip size="small" label={`Confidence ${formatPercent(entry.ConfidenceScore)}`} color={benchmarkTone(entry.ConfidenceScore)} />
+                            <Chip size="small" label={`Consistency ${formatPercent(entry.ConsistencyScore)}`} color={benchmarkTone(entry.ConsistencyScore)} />
+                          </Box>
+                          <Typography variant="caption" sx={{ display: "block", color: "text.secondary", mt: 0.75 }}>{entry.DiffSummary || "-"}</Typography>
+                        </Box>
+                      ))}
+                    </Stack>
+                  </Paper>
+                </Box>
+
+                <Box sx={{ mt: 2, p: 1.5, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider", maxHeight: 280, overflow: "auto" }}>
+                  <Typography variant="subtitle2" sx={{ fontWeight: 800, mb: 1 }}>Latest markdown summary</Typography>
+                  <Typography variant="body2" sx={{ whiteSpace: "pre-wrap", fontFamily: "ui-monospace, SFMono-Regular, monospace" }}>
+                    {benchmarkStatus?.reportMarkdown || "-"}
+                  </Typography>
+                </Box>
+              </>
+            ) : (
+              <Box sx={{ mt: 1.5, p: 1.5, borderRadius: 2, backgroundColor: "background.default", border: "1px solid", borderColor: "divider" }}>
+                <Typography variant="body2" sx={{ whiteSpace: "pre-wrap", fontFamily: "ui-monospace, SFMono-Regular, monospace" }}>
+                  {benchmarkStatus?.reportMarkdown || "Run scripts/run-cv-benchmark.sh to generate the latest corpus report and fixture candidates."}
+                </Typography>
+              </Box>
+            )}
          </Paper>
        </>
      ) : (