version: "3" vars: FIXTURES_DIR: "tools/benchmark-harness/fixtures" HARNESS_PATH: "./target/release/benchmark-harness" BENCHMARK_RESULTS_DIR: "benchmark-results" FLAMEGRAPH_DIR: "flamegraphs" tasks: run: desc: "Run benchmark harness with profiling support" requires: vars: - FRAMEWORK - MODE vars: ITERATIONS: '{{ .ITERATIONS | default "1" }}' TIMEOUT: '{{ .TIMEOUT | default "900" }}' MAX_CONCURRENT: '{{ if eq .MODE "single-file" }}1{{ else }}4{{ end }}' env: RUST_BACKTRACE: short cmds: - mkdir -p "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}" - | {{.HARNESS_PATH}} \ run \ --fixtures "{{.FIXTURES_DIR}}" \ --frameworks "{{.FRAMEWORK}}" \ --output "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}" \ --iterations "{{.ITERATIONS}}" \ --timeout "{{.TIMEOUT}}" \ --mode "{{.MODE}}" \ --max-concurrent "{{.MAX_CONCURRENT}}" profile: desc: "Run pipeline-benchmark with flamegraph profiling. Builds with --profile profiling so Rust symbols are resolved." vars: PIPELINE: '{{ .PIPELINE | default "baseline" }}' DOC_FILTER: '{{ .DOC_FILTER | default "pdf" }}' SHA: sh: git rev-parse --short HEAD env: RUST_BACKTRACE: short cmds: # Build with the `profiling` profile (inherits release, retains debug # info). A plain --release build strips Rust symbols, leaving the # flamegraph full of __mh_execute_header / raw addresses and unable # to surface kreuzberg::* hotspots. See docs/perf/profiling.md. - cargo build --profile profiling -p kreuzberg-cli --features all - cargo build --profile profiling -p benchmark-harness --features profiling - mkdir -p "{{.FLAMEGRAPH_DIR}}/{{.SHA}}" - | target/profiling/benchmark-harness pipeline-benchmark \ --fixtures "{{.FIXTURES_DIR}}" \ --paths "{{.PIPELINE}}" \ --doc "{{.DOC_FILTER}}" \ --profile-dir "{{.FLAMEGRAPH_DIR}}/{{.SHA}}" - 'echo "Flamegraph SVGs in {{.FLAMEGRAPH_DIR}}/{{.SHA}}/. Run: python3 tools/perf/extract_top_symbols.py {{.FLAMEGRAPH_DIR}}/{{.SHA}}/{{.PIPELINE}}.svg"' compare: desc: "Framework comparison with quality guardrails (baseline vs layout)" cmds: - cargo run -p benchmark-harness -- compare --fixtures "{{.FIXTURES_DIR}}" --guardrails pipeline:quick: desc: "Pipeline benchmark — native paths only (P1+P2)" cmds: - cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}" --paths baseline,layout pipeline:all: desc: "Pipeline benchmark — all 6 extraction paths" cmds: - cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}" survey: desc: "Corpus-wide extraction stats for all PDFs" cmds: - cargo run -p benchmark-harness -- survey --fixtures "{{.FIXTURES_DIR}}" --types pdf models: desc: "Layout model A/B comparison (fast vs accurate)" cmds: - cargo run -p benchmark-harness -- model-benchmark --fixtures "{{.FIXTURES_DIR}}" generate-gt: desc: "Generate markdown ground truth from PDFs using Gemini" cmds: - uv run --no-sync tools/benchmark-harness/scripts/generate_markdown_gt.py download:omnidocbench: desc: "Download OmniDocBench dataset from HuggingFace (~1.3 GB)" cmds: - bash tools/benchmark-harness/scripts/download_omnidocbench.sh status: - test -f tools/benchmark-harness/datasets/omnidocbench/OmniDocBench.json import:omnidocbench: desc: "Import OmniDocBench into benchmark fixtures (run download:omnidocbench first)" deps: ["download:omnidocbench"] cmds: - python3 tools/benchmark-harness/scripts/import_omnidocbench.py tools/benchmark-harness/datasets/omnidocbench . status: - test -f tools/benchmark-harness/fixtures/pdf/omnidoc_*.json clean-results: desc: "Clean up benchmark results and profiles" cmds: - rm -rf "{{.BENCHMARK_RESULTS_DIR}}" - rm -rf "{{.FLAMEGRAPH_DIR}}"