refactor, security updates, cv extraction upgrades
This commit is contained in:
Executable
+91
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
CORPUS_DIR="${CV_CORPUS_DIR:-$HOME/cvs}"
|
||||
TIMESTAMP="$(date +%Y%m%d-%H%M%S)"
|
||||
EXPORT_ROOT="${CV_JSON_EXPORT_DIR:-$ROOT_DIR/tmp/cv-json-export/$TIMESTAMP}"
|
||||
OUTPUT_ROOT="$EXPORT_ROOT/benchmark"
|
||||
APPROVED_DIR="$EXPORT_ROOT/approved-fixtures"
|
||||
EDITABLE_DIR="$EXPORT_ROOT/editable-json"
|
||||
DEFAULT_IGNORE_PATTERNS="cv-template.pdf,Resume.en.pdf,EPS-*.pdf"
|
||||
IGNORE_PATTERNS="${CV_BENCHMARK_IGNORE:-$DEFAULT_IGNORE_PATTERNS}"
|
||||
|
||||
resolve_dotnet() {
|
||||
if command -v dotnet >/dev/null 2>&1; then
|
||||
command -v dotnet
|
||||
return 0
|
||||
fi
|
||||
|
||||
local candidates=(
|
||||
"$HOME/.gsd/agent/bin/dotnet"
|
||||
"$HOME/.dotnet/dotnet"
|
||||
"/usr/bin/dotnet"
|
||||
"/usr/local/bin/dotnet"
|
||||
)
|
||||
|
||||
local candidate
|
||||
for candidate in "${candidates[@]}"; do
|
||||
if [[ -x "$candidate" ]]; then
|
||||
printf '%s\n' "$candidate"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
DOTNET_BIN="$(resolve_dotnet || true)"
|
||||
|
||||
if [[ ! -d "$CORPUS_DIR" ]]; then
|
||||
echo "CV corpus directory not found: $CORPUS_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$DOTNET_BIN" ]]; then
|
||||
echo "dotnet not found on PATH or known fallback locations." >&2
|
||||
echo "Checked: ~/.gsd/agent/bin/dotnet, ~/.dotnet/dotnet, /usr/bin/dotnet, /usr/local/bin/dotnet" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$CORPUS_DIR" != "/home/pi/cvs" ]]; then
|
||||
echo "This wrapper currently relies on the existing benchmark harness, which scans /home/pi/cvs." >&2
|
||||
echo "Set up a symlink or move the corpus there, or rerun with CV_CORPUS_DIR=/home/pi/cvs." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$OUTPUT_ROOT" "$APPROVED_DIR" "$EDITABLE_DIR"
|
||||
|
||||
echo "Exporting structured CV JSON from: $CORPUS_DIR"
|
||||
echo "Working directory: $EXPORT_ROOT"
|
||||
echo "Ignoring files: $IGNORE_PATTERNS"
|
||||
|
||||
CV_BENCHMARK_OUTPUT_DIR="$OUTPUT_ROOT" \
|
||||
CV_BENCHMARK_APPROVED_DIR="$APPROVED_DIR" \
|
||||
CV_BENCHMARK_IGNORE="$IGNORE_PATTERNS" \
|
||||
"$DOTNET_BIN" test "$ROOT_DIR/JobTrackerApi.Tests/JobTrackerApi.Tests.csproj" --filter CvCorpusHarnessTests /p:DisableSourceControlManagerQueries=true
|
||||
|
||||
if [[ -d "$OUTPUT_ROOT/outputs" ]]; then
|
||||
find "$OUTPUT_ROOT/outputs" -maxdepth 1 -name '*.json' -print0 | while IFS= read -r -d '' file; do
|
||||
base="$(basename "$file")"
|
||||
cp "$file" "$EDITABLE_DIR/$base"
|
||||
done
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
Done.
|
||||
|
||||
Generated files:
|
||||
- Latest parser output: $OUTPUT_ROOT/outputs
|
||||
- Editable copies: $EDITABLE_DIR
|
||||
- Candidate fixtures: $OUTPUT_ROOT/candidate-fixtures
|
||||
- Summary report: $OUTPUT_ROOT/report.md
|
||||
- Machine index: $OUTPUT_ROOT/index.json
|
||||
|
||||
Recommended workflow:
|
||||
1. Edit files in: $EDITABLE_DIR
|
||||
2. Keep only the fields you really want the extractor to produce.
|
||||
3. Once reviewed, copy the corrected files into an approved fixtures directory.
|
||||
4. Re-run the benchmark and compare actual vs approved output.
|
||||
EOF
|
||||
Reference in New Issue
Block a user