Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/.task/workflows/benchmark.yml
+++ b/.task/workflows/benchmark.yml
@@ -0,0 +1,109 @@
+version: "3"
+
+vars:
+  FIXTURES_DIR: "tools/benchmark-harness/fixtures"
+  HARNESS_PATH: "./target/release/benchmark-harness"
+  BENCHMARK_RESULTS_DIR: "benchmark-results"
+  FLAMEGRAPH_DIR: "flamegraphs"
+
+tasks:
+  run:
+    desc: "Run benchmark harness with profiling support"
+    requires:
+      vars:
+        - FRAMEWORK
+        - MODE
+    vars:
+      ITERATIONS: '{{ .ITERATIONS | default "1" }}'
+      TIMEOUT: '{{ .TIMEOUT | default "900" }}'
+      MAX_CONCURRENT: '{{ if eq .MODE "single-file" }}1{{ else }}4{{ end }}'
+    env:
+      RUST_BACKTRACE: short
+    cmds:
+      - mkdir -p "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}"
+      - |
+        {{.HARNESS_PATH}} \
+          run \
+          --fixtures "{{.FIXTURES_DIR}}" \
+          --frameworks "{{.FRAMEWORK}}" \
+          --output "{{.BENCHMARK_RESULTS_DIR}}/{{.FRAMEWORK}}-{{.MODE}}" \
+          --iterations "{{.ITERATIONS}}" \
+          --timeout "{{.TIMEOUT}}" \
+          --mode "{{.MODE}}" \
+          --max-concurrent "{{.MAX_CONCURRENT}}"
+
+  profile:
+    desc: "Run pipeline-benchmark with flamegraph profiling. Builds with --profile profiling so Rust symbols are resolved."
+    vars:
+      PIPELINE: '{{ .PIPELINE | default "baseline" }}'
+      DOC_FILTER: '{{ .DOC_FILTER | default "pdf" }}'
+      SHA:
+        sh: git rev-parse --short HEAD
+    env:
+      RUST_BACKTRACE: short
+    cmds:
+      # Build with the `profiling` profile (inherits release, retains debug
+      # info). A plain --release build strips Rust symbols, leaving the
+      # flamegraph full of __mh_execute_header / raw addresses and unable
+      # to surface kreuzberg::* hotspots. See docs/perf/profiling.md.
+      - cargo build --profile profiling -p kreuzberg-cli --features all
+      - cargo build --profile profiling -p benchmark-harness --features profiling
+      - mkdir -p "{{.FLAMEGRAPH_DIR}}/{{.SHA}}"
+      - |
+        target/profiling/benchmark-harness pipeline-benchmark \
+          --fixtures "{{.FIXTURES_DIR}}" \
+          --paths "{{.PIPELINE}}" \
+          --doc "{{.DOC_FILTER}}" \
+          --profile-dir "{{.FLAMEGRAPH_DIR}}/{{.SHA}}"
+      - 'echo "Flamegraph SVGs in {{.FLAMEGRAPH_DIR}}/{{.SHA}}/. Run: python3 tools/perf/extract_top_symbols.py {{.FLAMEGRAPH_DIR}}/{{.SHA}}/{{.PIPELINE}}.svg"'
+
+  compare:
+    desc: "Framework comparison with quality guardrails (baseline vs layout)"
+    cmds:
+      - cargo run -p benchmark-harness -- compare --fixtures "{{.FIXTURES_DIR}}" --guardrails
+
+  pipeline:quick:
+    desc: "Pipeline benchmark — native paths only (P1+P2)"
+    cmds:
+      - cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}" --paths baseline,layout
+
+  pipeline:all:
+    desc: "Pipeline benchmark — all 6 extraction paths"
+    cmds:
+      - cargo run -p benchmark-harness -- pipeline-benchmark --fixtures "{{.FIXTURES_DIR}}"
+
+  survey:
+    desc: "Corpus-wide extraction stats for all PDFs"
+    cmds:
+      - cargo run -p benchmark-harness -- survey --fixtures "{{.FIXTURES_DIR}}" --types pdf
+
+  models:
+    desc: "Layout model A/B comparison (fast vs accurate)"
+    cmds:
+      - cargo run -p benchmark-harness -- model-benchmark --fixtures "{{.FIXTURES_DIR}}"
+
+  generate-gt:
+    desc: "Generate markdown ground truth from PDFs using Gemini"
+    cmds:
+      - uv run --no-sync tools/benchmark-harness/scripts/generate_markdown_gt.py
+
+  download:omnidocbench:
+    desc: "Download OmniDocBench dataset from HuggingFace (~1.3 GB)"
+    cmds:
+      - bash tools/benchmark-harness/scripts/download_omnidocbench.sh
+    status:
+      - test -f tools/benchmark-harness/datasets/omnidocbench/OmniDocBench.json
+
+  import:omnidocbench:
+    desc: "Import OmniDocBench into benchmark fixtures (run download:omnidocbench first)"
+    deps: ["download:omnidocbench"]
+    cmds:
+      - python3 tools/benchmark-harness/scripts/import_omnidocbench.py tools/benchmark-harness/datasets/omnidocbench .
+    status:
+      - test -f tools/benchmark-harness/fixtures/pdf/omnidoc_*.json
+
+  clean-results:
+    desc: "Clean up benchmark results and profiles"
+    cmds:
+      - rm -rf "{{.BENCHMARK_RESULTS_DIR}}"
+      - rm -rf "{{.FLAMEGRAPH_DIR}}"