This commit is contained in:
109
.task/workflows/benchmark.yml
Normal file
109
.task/workflows/benchmark.yml
Normal file
@@ -0,0 +1,109 @@
|
||||
version: "3"
|
||||
|
||||
vars:
|
||||
FIXTURES_DIR: "tools/benchmark-harness/fixtures"
|
||||
HARNESS_PATH: "./target/release/benchmark-harness"
|
||||
BENCHMARK_RESULTS_DIR: "benchmark-results"
|
||||
FLAMEGRAPH_DIR: "flamegraphs"
|
||||
|
||||
tasks:
|
||||
run:
|
||||
desc: "Run benchmark harness with profiling support"
|
||||
requires:
|
||||
vars:
|
||||
- FRAMEWORK
|
||||
- MODE
|
||||
vars:
|
||||
ITERATIONS: '{{ .ITERATIONS | default "1" }}'
|
||||
TIMEOUT: '{{ .TIMEOUT | default "900" }}'
|
||||
MAX_CONCURRENT: '{{ if eq .MODE "single-file" }}1{{ else }}4{{ end }}'
|
||||
env:
|
||||
RUST_BACKTRACE: short
|
||||
cmds:
|
||||
- mkdir -p "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}"
|
||||
- |
|
||||
{{.HARNESS_PATH}} \
|
||||
run \
|
||||
--fixtures "{{.FIXTURES_DIR}}" \
|
||||
--frameworks "{{.FRAMEWORK}}" \
|
||||
--output "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}" \
|
||||
--iterations "{{.ITERATIONS}}" \
|
||||
--timeout "{{.TIMEOUT}}" \
|
||||
--mode "{{.MODE}}" \
|
||||
--max-concurrent "{{.MAX_CONCURRENT}}"
|
||||
|
||||
profile:
|
||||
desc: "Run pipeline-benchmark with flamegraph profiling. Builds with --profile profiling so Rust symbols are resolved."
|
||||
vars:
|
||||
PIPELINE: '{{ .PIPELINE | default "baseline" }}'
|
||||
DOC_FILTER: '{{ .DOC_FILTER | default "pdf" }}'
|
||||
SHA:
|
||||
sh: git rev-parse --short HEAD
|
||||
env:
|
||||
RUST_BACKTRACE: short
|
||||
cmds:
|
||||
# Build with the `profiling` profile (inherits release, retains debug
|
||||
# info). A plain --release build strips Rust symbols, leaving the
|
||||
# flamegraph full of __mh_execute_header / raw addresses and unable
|
||||
# to surface kreuzberg::* hotspots. See docs/perf/profiling.md.
|
||||
- cargo build --profile profiling -p kreuzberg-cli --features all
|
||||
- cargo build --profile profiling -p benchmark-harness --features profiling
|
||||
- mkdir -p "{{.FLAMEGRAPH_DIR}}/{{.SHA}}"
|
||||
- |
|
||||
target/profiling/benchmark-harness pipeline-benchmark \
|
||||
--fixtures "{{.FIXTURES_DIR}}" \
|
||||
--paths "{{.PIPELINE}}" \
|
||||
--doc "{{.DOC_FILTER}}" \
|
||||
--profile-dir "{{.FLAMEGRAPH_DIR}}/{{.SHA}}"
|
||||
- 'echo "Flamegraph SVGs in {{.FLAMEGRAPH_DIR}}/{{.SHA}}/. Run: python3 tools/perf/extract_top_symbols.py {{.FLAMEGRAPH_DIR}}/{{.SHA}}/{{.PIPELINE}}.svg"'
|
||||
|
||||
compare:
|
||||
desc: "Framework comparison with quality guardrails (baseline vs layout)"
|
||||
cmds:
|
||||
- cargo run -p benchmark-harness -- compare --fixtures "{{.FIXTURES_DIR}}" --guardrails
|
||||
|
||||
pipeline:quick:
|
||||
desc: "Pipeline benchmark — native paths only (P1+P2)"
|
||||
cmds:
|
||||
- cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}" --paths baseline,layout
|
||||
|
||||
pipeline:all:
|
||||
desc: "Pipeline benchmark — all 6 extraction paths"
|
||||
cmds:
|
||||
- cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}"
|
||||
|
||||
survey:
|
||||
desc: "Corpus-wide extraction stats for all PDFs"
|
||||
cmds:
|
||||
- cargo run -p benchmark-harness -- survey --fixtures "{{.FIXTURES_DIR}}" --types pdf
|
||||
|
||||
models:
|
||||
desc: "Layout model A/B comparison (fast vs accurate)"
|
||||
cmds:
|
||||
- cargo run -p benchmark-harness -- model-benchmark --fixtures "{{.FIXTURES_DIR}}"
|
||||
|
||||
generate-gt:
|
||||
desc: "Generate markdown ground truth from PDFs using Gemini"
|
||||
cmds:
|
||||
- uv run --no-sync tools/benchmark-harness/scripts/generate_markdown_gt.py
|
||||
|
||||
download:omnidocbench:
|
||||
desc: "Download OmniDocBench dataset from HuggingFace (~1.3 GB)"
|
||||
cmds:
|
||||
- bash tools/benchmark-harness/scripts/download_omnidocbench.sh
|
||||
status:
|
||||
- test -f tools/benchmark-harness/datasets/omnidocbench/OmniDocBench.json
|
||||
|
||||
import:omnidocbench:
|
||||
desc: "Import OmniDocBench into benchmark fixtures (run download:omnidocbench first)"
|
||||
deps: ["download:omnidocbench"]
|
||||
cmds:
|
||||
- python3 tools/benchmark-harness/scripts/import_omnidocbench.py tools/benchmark-harness/datasets/omnidocbench .
|
||||
status:
|
||||
- test -f tools/benchmark-harness/fixtures/pdf/omnidoc_*.json
|
||||
|
||||
clean-results:
|
||||
desc: "Clean up benchmark results and profiles"
|
||||
cmds:
|
||||
- rm -rf "{{.BENCHMARK_RESULTS_DIR}}"
|
||||
- rm -rf "{{.FLAMEGRAPH_DIR}}"
|
||||
Reference in New Issue
Block a user