Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,109 @@
version: "3"
vars:
FIXTURES_DIR: "tools/benchmark-harness/fixtures"
HARNESS_PATH: "./target/release/benchmark-harness"
BENCHMARK_RESULTS_DIR: "benchmark-results"
FLAMEGRAPH_DIR: "flamegraphs"
tasks:
run:
desc: "Run benchmark harness with profiling support"
requires:
vars:
- FRAMEWORK
- MODE
vars:
ITERATIONS: '{{ .ITERATIONS | default "1" }}'
TIMEOUT: '{{ .TIMEOUT | default "900" }}'
MAX_CONCURRENT: '{{ if eq .MODE "single-file" }}1{{ else }}4{{ end }}'
env:
RUST_BACKTRACE: short
cmds:
- mkdir -p "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}"
- |
{{.HARNESS_PATH}} \
run \
--fixtures "{{.FIXTURES_DIR}}" \
--frameworks "{{.FRAMEWORK}}" \
--output "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}" \
--iterations "{{.ITERATIONS}}" \
--timeout "{{.TIMEOUT}}" \
--mode "{{.MODE}}" \
--max-concurrent "{{.MAX_CONCURRENT}}"
profile:
desc: "Run pipeline-benchmark with flamegraph profiling. Builds with --profile profiling so Rust symbols are resolved."
vars:
PIPELINE: '{{ .PIPELINE | default "baseline" }}'
DOC_FILTER: '{{ .DOC_FILTER | default "pdf" }}'
SHA:
sh: git rev-parse --short HEAD
env:
RUST_BACKTRACE: short
cmds:
# Build with the `profiling` profile (inherits release, retains debug
# info). A plain --release build strips Rust symbols, leaving the
# flamegraph full of __mh_execute_header / raw addresses and unable
# to surface kreuzberg::* hotspots. See docs/perf/profiling.md.
- cargo build --profile profiling -p kreuzberg-cli --features all
- cargo build --profile profiling -p benchmark-harness --features profiling
- mkdir -p "{{.FLAMEGRAPH_DIR}}/{{.SHA}}"
- |
target/profiling/benchmark-harness pipeline-benchmark \
--fixtures "{{.FIXTURES_DIR}}" \
--paths "{{.PIPELINE}}" \
--doc "{{.DOC_FILTER}}" \
--profile-dir "{{.FLAMEGRAPH_DIR}}/{{.SHA}}"
- 'echo "Flamegraph SVGs in {{.FLAMEGRAPH_DIR}}/{{.SHA}}/. Run: python3 tools/perf/extract_top_symbols.py {{.FLAMEGRAPH_DIR}}/{{.SHA}}/{{.PIPELINE}}.svg"'
compare:
desc: "Framework comparison with quality guardrails (baseline vs layout)"
cmds:
- cargo run -p benchmark-harness -- compare --fixtures "{{.FIXTURES_DIR}}" --guardrails
pipeline:quick:
desc: "Pipeline benchmark — native paths only (P1+P2)"
cmds:
- cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}" --paths baseline,layout
pipeline:all:
desc: "Pipeline benchmark — all 6 extraction paths"
cmds:
- cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}"
survey:
desc: "Corpus-wide extraction stats for all PDFs"
cmds:
- cargo run -p benchmark-harness -- survey --fixtures "{{.FIXTURES_DIR}}" --types pdf
models:
desc: "Layout model A/B comparison (fast vs accurate)"
cmds:
- cargo run -p benchmark-harness -- model-benchmark --fixtures "{{.FIXTURES_DIR}}"
generate-gt:
desc: "Generate markdown ground truth from PDFs using Gemini"
cmds:
- uv run --no-sync tools/benchmark-harness/scripts/generate_markdown_gt.py
download:omnidocbench:
desc: "Download OmniDocBench dataset from HuggingFace (~1.3 GB)"
cmds:
- bash tools/benchmark-harness/scripts/download_omnidocbench.sh
status:
- test -f tools/benchmark-harness/datasets/omnidocbench/OmniDocBench.json
import:omnidocbench:
desc: "Import OmniDocBench into benchmark fixtures (run download:omnidocbench first)"
deps: ["download:omnidocbench"]
cmds:
- python3 tools/benchmark-harness/scripts/import_omnidocbench.py tools/benchmark-harness/datasets/omnidocbench .
status:
- test -f tools/benchmark-harness/fixtures/pdf/omnidoc_*.json
clean-results:
desc: "Clean up benchmark results and profiles"
cmds:
- rm -rf "{{.BENCHMARK_RESULTS_DIR}}"
- rm -rf "{{.FLAMEGRAPH_DIR}}"