92 lines
2.7 KiB
Bash
Executable File
92 lines
2.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
CORPUS_DIR="${CV_CORPUS_DIR:-$HOME/cvs}"
|
|
TIMESTAMP="$(date +%Y%m%d-%H%M%S)"
|
|
EXPORT_ROOT="${CV_JSON_EXPORT_DIR:-$ROOT_DIR/tmp/cv-json-export/$TIMESTAMP}"
|
|
OUTPUT_ROOT="$EXPORT_ROOT/benchmark"
|
|
APPROVED_DIR="$EXPORT_ROOT/approved-fixtures"
|
|
EDITABLE_DIR="$EXPORT_ROOT/editable-json"
|
|
DEFAULT_IGNORE_PATTERNS="cv-template.pdf,Resume.en.pdf,EPS-*.pdf"
|
|
IGNORE_PATTERNS="${CV_BENCHMARK_IGNORE:-$DEFAULT_IGNORE_PATTERNS}"
|
|
|
|
resolve_dotnet() {
|
|
if command -v dotnet >/dev/null 2>&1; then
|
|
command -v dotnet
|
|
return 0
|
|
fi
|
|
|
|
local candidates=(
|
|
"$HOME/.gsd/agent/bin/dotnet"
|
|
"$HOME/.dotnet/dotnet"
|
|
"/usr/bin/dotnet"
|
|
"/usr/local/bin/dotnet"
|
|
)
|
|
|
|
local candidate
|
|
for candidate in "${candidates[@]}"; do
|
|
if [[ -x "$candidate" ]]; then
|
|
printf '%s\n' "$candidate"
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
DOTNET_BIN="$(resolve_dotnet || true)"
|
|
|
|
if [[ ! -d "$CORPUS_DIR" ]]; then
|
|
echo "CV corpus directory not found: $CORPUS_DIR" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ -z "$DOTNET_BIN" ]]; then
|
|
echo "dotnet not found on PATH or known fallback locations." >&2
|
|
echo "Checked: ~/.gsd/agent/bin/dotnet, ~/.dotnet/dotnet, /usr/bin/dotnet, /usr/local/bin/dotnet" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$CORPUS_DIR" != "/home/pi/cvs" ]]; then
|
|
echo "This wrapper currently relies on the existing benchmark harness, which scans /home/pi/cvs." >&2
|
|
echo "Set up a symlink or move the corpus there, or rerun with CV_CORPUS_DIR=/home/pi/cvs." >&2
|
|
exit 1
|
|
fi
|
|
|
|
mkdir -p "$OUTPUT_ROOT" "$APPROVED_DIR" "$EDITABLE_DIR"
|
|
|
|
echo "Exporting structured CV JSON from: $CORPUS_DIR"
|
|
echo "Working directory: $EXPORT_ROOT"
|
|
echo "Ignoring files: $IGNORE_PATTERNS"
|
|
|
|
CV_BENCHMARK_OUTPUT_DIR="$OUTPUT_ROOT" \
|
|
CV_BENCHMARK_APPROVED_DIR="$APPROVED_DIR" \
|
|
CV_BENCHMARK_IGNORE="$IGNORE_PATTERNS" \
|
|
"$DOTNET_BIN" test "$ROOT_DIR/JobTrackerApi.Tests/JobTrackerApi.Tests.csproj" --filter CvCorpusHarnessTests /p:DisableSourceControlManagerQueries=true
|
|
|
|
if [[ -d "$OUTPUT_ROOT/outputs" ]]; then
|
|
find "$OUTPUT_ROOT/outputs" -maxdepth 1 -name '*.json' -print0 | while IFS= read -r -d '' file; do
|
|
base="$(basename "$file")"
|
|
cp "$file" "$EDITABLE_DIR/$base"
|
|
done
|
|
fi
|
|
|
|
cat <<EOF
|
|
|
|
Done.
|
|
|
|
Generated files:
|
|
- Latest parser output: $OUTPUT_ROOT/outputs
|
|
- Editable copies: $EDITABLE_DIR
|
|
- Candidate fixtures: $OUTPUT_ROOT/candidate-fixtures
|
|
- Summary report: $OUTPUT_ROOT/report.md
|
|
- Machine index: $OUTPUT_ROOT/index.json
|
|
|
|
Recommended workflow:
|
|
1. Edit files in: $EDITABLE_DIR
|
|
2. Keep only the fields you really want the extractor to produce.
|
|
3. Once reviewed, copy the corrected files into an approved fixtures directory.
|
|
4. Re-run the benchmark and compare actual vs approved output.
|
|
EOF
|