Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/tools/benchmark-harness/Cargo.toml
+++ b/tools/benchmark-harness/Cargo.toml
@@ -0,0 +1,55 @@
+[package]
+name = "benchmark-harness"
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+license.workspace = true
+repository.workspace = true
+homepage = "https://kreuzberg.dev"
+default-run = "benchmark-harness"
+
+[lib]
+name = "benchmark_harness"
+path = "src/lib.rs"
+
+[[bin]]
+name = "benchmark-harness"
+path = "src/main.rs"
+
+[features]
+default = []
+profiling = ["pprof"]
+memory-profiling = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
+
+[dependencies]
+
+ahash = { workspace = true }
+
+async-trait = { workspace = true }
+chrono = { workspace = true }
+
+clap = { workspace = true }
+futures = { workspace = true }
+kreuzberg = { path = "../../crates/kreuzberg", features = ["full"] }
+num_cpus = { workspace = true }
+
+pprof = { version = "0.15", features = ["flamegraph", "criterion"], optional = true }
+pulldown-cmark = "0.13"
+rayon = { workspace = true }
+regex = "1"
+serde = { workspace = true }
+serde_json = { workspace = true }
+shellexpand = "3"
+
+sysinfo = "0.38"
+tempfile = { workspace = true }
+thiserror = { workspace = true }
+tikv-jemalloc-ctl = { version = "0.7", features = ["stats"], optional = true }
+tikv-jemallocator = { version = "0.7", optional = true }
+tokio = { workspace = true, features = ["full"] }
+tracing = { workspace = true }
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+which = "8"
+
+[dev-dependencies]
--- a/tools/benchmark-harness/README.md
+++ b/tools/benchmark-harness/README.md
@@ -0,0 +1,394 @@
+# Benchmark Harness
+
+Rust CLI tool for comparative benchmarking of document extraction across 13 Kreuzberg language bindings and 12 reference frameworks. Measures performance (latency, throughput, memory) and quality (TF1, SF1) against ground truth.
+
+## Overview
+
+The benchmark harness serves two distinct workflows:
+
+- **CI benchmarking** -- automated cross-framework comparison triggered via GitHub Actions, producing aggregated results published as GitHub Releases.
+- **Local quality assessment** -- developer-facing pipeline comparison against ground truth for extraction quality triage and regression detection.
+
+## Architecture
+
+```text
+CLI (clap)
+ |
+ +-- run              --> AdapterRegistry --> BenchmarkRunner --> results.json
+ |                         |
+ |                         +-- NativeAdapter (in-process Kreuzberg)
+ |                         +-- SubprocessAdapter (persistent child process)
+ |                         +-- BatchSubprocessAdapter (batch API)
+ |
+ +-- compare          --> ComparisonConfig --> Pipeline extraction --> Quality scoring
+ +-- pipeline-benchmark --> 6-path matrix --> TF1/SF1 scoring --> Triage tables
+ +-- consolidate      --> Load multi-job results --> Aggregate percentiles
+ +-- validate-gt      --> Fixture scan --> HTML cleanup --> Integrity report
+ +-- survey           --> Corpus-wide extraction stats
+ +-- model-benchmark  --> Layout model A/B comparison
+ +-- embed-benchmark  --> Embedding throughput measurement
+```
+
+### Module Structure
+
+| Module                              | Purpose                                                                                                                    |
+| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------- |
+| `main.rs`                           | CLI entry point (clap subcommands)                                                                                         |
+| `adapter.rs`                        | `FrameworkAdapter` trait definition                                                                                        |
+| `adapters/`                         | Adapter implementations: subprocess (persistent/batch), native (in-process), kreuzberg factory functions for all languages |
+| `runner.rs`                         | Benchmark orchestration, iteration control, resource monitoring                                                            |
+| `quality.rs`                        | TF1: token-level bag-of-words F1 scoring                                                                                   |
+| `markdown_quality.rs`               | SF1: structural block-level F1 scoring                                                                                     |
+| `comparison.rs`                     | Multi-pipeline extraction with quality guardrails                                                                          |
+| `pipeline_benchmark.rs`             | 6-path extraction matrix benchmark                                                                                         |
+| `corpus.rs`, `fixture.rs`           | Fixture loading, filtering, validation                                                                                     |
+| `aggregate.rs`, `consolidate.rs`    | Multi-job result merging and percentile aggregation                                                                        |
+| `output.rs`, `stats.rs`             | Result serialization and statistical analysis                                                                              |
+| `validate_gt.rs`                    | Ground truth integrity checks and HTML-to-GFM cleanup                                                                      |
+| `monitoring.rs`                     | CPU and memory sampling during benchmarks                                                                                  |
+| `profiling.rs`, `profile_report.rs` | Flamegraph generation (requires `profiling` feature)                                                                       |
+| `survey.rs`                         | Corpus-wide extraction statistics                                                                                          |
+| `model_benchmark.rs`                | Layout model A/B comparison                                                                                                |
+| `embed_benchmark.rs`                | Embedding throughput benchmarks                                                                                            |
+| `sizes.rs`                          | Framework installation footprint measurement                                                                               |
+
+## Quality Scoring
+
+### TF1 (Text F1)
+
+Token-level bag-of-words F1 between extracted text and ground truth.
+
+- Tokenization: lowercase, split on whitespace, keep alphanumeric tokens plus `.` and `,`
+- Separate numeric-token F1 for number-heavy documents (financial, scientific)
+- Combined score: `quality_score = 0.6 * f1_text + 0.4 * f1_numeric`
+
+### SF1 (Structural F1)
+
+Block-level matching between extracted markdown and ground truth markdown.
+
+- **Block types:** Heading1-6, Paragraph, CodeBlock, Formula, Table, ListItem, Image
+- **Type weights:** Headings = 2.0, Code/Formula/Table = 1.5, ListItem = 1.0, Paragraph/Image = 0.5
+- **Matching:** Greedy 1:1 with fuzzy cross-type compatibility (e.g., bold paragraph matched to heading gets 0.4 compatibility score)
+- **Adjacent concatenation:** Consecutive blocks of the same type are merged before matching
+- **Order score:** Longest Increasing Subsequence (LIS) on matched block indices
+
+### Combined Score
+
+When markdown ground truth is available, both metrics are combined:
+
+```text
+quality_score = 0.5 * f1_text + 0.2 * f1_numeric + 0.3 * f1_layout
+```
+
+## Fixture Format
+
+Fixtures are JSON files organized by format directory under `fixtures/`:
+
+```json
+{
+  "document": "relative/path/to/file.pdf",
+  "file_type": "pdf",
+  "file_size": 123456,
+  "expected_frameworks": ["kreuzberg", "docling"],
+  "metadata": {},
+  "ground_truth": {
+    "text_file": "relative/path/to/gt.txt",
+    "markdown_file": "relative/path/to/gt.md",
+    "source": "manual|vision|pdf_text_layer|pandoc|python-docx|..."
+  }
+}
+```
+
+### Ground Truth Coverage
+
+| Format | Fixtures | With Markdown GT |
+| ------ | -------- | ---------------- |
+| PDF    | 159      | 158              |
+| HTML   | 36       | 36               |
+| DOCX   | 26       | 26               |
+| ODT    | 19       | 19               |
+| RTF    | 17       | 17               |
+| XLSX   | 12       | 11               |
+| CSV    | 11       | 11               |
+| EPUB   | 8        | 8                |
+| PPTX   | 8        | 8                |
+| Org    | 6        | 6                |
+| DOC    | 5        | 5                |
+| OPML   | 4        | 4                |
+| RST    | 3        | 3                |
+| XLS    | 3        | 3                |
+| IPynb  | 1        | 1                |
+| JATS   | 1        | 1                |
+| LaTeX  | 1        | 1                |
+
+**Total:** 318 fixtures with markdown ground truth across 17 formats.
+
+## Frameworks
+
+### Kreuzberg Bindings (13)
+
+Each binding is benchmarked in both single-file (sequential, fair latency) and batch (concurrent, throughput) modes:
+
+Rust, Python, Node.js, Ruby, Go, Java, C#, PHP, Elixir, R, WASM, C, Rust+PaddleOCR
+
+### Reference Frameworks (12)
+
+External document extraction tools benchmarked in single-file mode:
+
+Docling, MarkItDown, Pandoc, Unstructured, Tika, PyMuPDF4LLM, PDFPlumber, MinerU, PyPDF, PDFMiner, PDFtoText, Playa-PDF
+
+## Extraction Pipelines
+
+The `compare` and `pipeline-benchmark` commands support these extraction paths:
+
+| Pipeline           | Description                                    |
+| ------------------ | ---------------------------------------------- |
+| `baseline`         | Native PDF text extraction (no OCR, no layout) |
+| `layout`           | Native PDF with layout detection               |
+| `tesseract`        | Tesseract OCR with force_ocr                   |
+| `tesseract+layout` | Tesseract OCR with layout detection            |
+| `paddle`           | PaddleOCR mobile tier with force_ocr           |
+| `paddle+layout`    | PaddleOCR mobile tier with layout detection    |
+| `paddle-server`    | PaddleOCR server tier                          |
+| `docling`          | Vendored Docling reference extraction          |
+| `paddleocr-python` | Vendored PaddleOCR Python extraction           |
+| `rapidocr`         | Vendored RapidOCR extraction                   |
+
+## CLI Reference
+
+### `run` -- CI benchmark execution
+
+Runs benchmarks using framework adapters with configurable iterations, warmup, and sharding.
+
+```bash
+benchmark-harness run \
+  -f fixtures/ \
+  -F kreuzberg-rust,kreuzberg-python \
+  -m batch \
+  -o results/ \
+  -i 3 -w 1
+```
+
+| Flag                   | Description                                    | Default       |
+| ---------------------- | ---------------------------------------------- | ------------- |
+| `-f, --fixtures`       | Fixture directory or file                      | required      |
+| `-F, --frameworks`     | Comma-separated framework names                | all available |
+| `-o, --output`         | Output directory                               | `results`     |
+| `-m, --mode`           | `single-file` or `batch`                       | `batch`       |
+| `-i, --iterations`     | Benchmark iterations                           | `3`           |
+| `-w, --warmup`         | Warmup iterations (discarded)                  | `1`           |
+| `-c, --max-concurrent` | Max concurrent extractions                     | CPU count     |
+| `-t, --timeout`        | Timeout in seconds                             | `1800`        |
+| `--ocr`                | Enable OCR                                     | `false`       |
+| `--measure-quality`    | Enable quality assessment                      | `false`       |
+| `--shard`              | Run fixture subset (`INDEX/TOTAL`, e.g. `1/3`) | none          |
+
+### `consolidate` -- Merge multi-job results
+
+Combines benchmark results from parallel CI jobs into a single aggregated report with percentiles.
+
+```bash
+benchmark-harness consolidate \
+  --inputs dir1,dir2,dir3 \
+  --output consolidated/
+```
+
+### `compare` -- Local pipeline comparison
+
+Compares extraction pipelines on the document corpus with quality scoring and optional guardrails.
+
+```bash
+benchmark-harness compare \
+  -f fixtures/ \
+  --pipelines baseline,layout,paddle \
+  --dump-outputs \
+  --guardrails
+```
+
+| Flag             | Description                                           |
+| ---------------- | ----------------------------------------------------- |
+| `--pipelines`    | Comma-separated pipeline names                        |
+| `--dump-outputs` | Write extraction outputs to `/tmp/kreuzberg_compare/` |
+| `--guardrails`   | Fail on quality regressions (non-zero exit)           |
+| `--filter`       | Only run documents matching this substring            |
+
+### `pipeline-benchmark` -- 6-path extraction matrix
+
+Runs all pipelines across the corpus and produces a ranked triage table.
+
+```bash
+benchmark-harness pipeline-benchmark \
+  -f fixtures/ \
+  --group tables \
+  --sort-by sf1 \
+  --bottom-n 10 \
+  --triage-blocks
+```
+
+| Flag              | Description                                                                                  | Default             |
+| ----------------- | -------------------------------------------------------------------------------------------- | ------------------- |
+| `--paths`         | Comma-separated pipeline names                                                               | all 6 default paths |
+| `--doc`           | Filter by document name substrings                                                           | none                |
+| `--group`         | Named benchmark group (`tables`, `structure`, `multicolumn`, `text-quality`, `ocr-fallback`) | none                |
+| `--sort-by`       | Sort metric: `sf1`, `tf1`, `time`                                                            | `sf1`               |
+| `--bottom-n`      | Show only the N worst-performing documents                                                   | none                |
+| `--triage-blocks` | Print per-block-type F1 breakdown                                                            | `false`             |
+| `--dump-outputs`  | Write outputs to `/tmp/kreuzberg_pipeline/`                                                  | `false`             |
+| `--json-output`   | Write JSON results to file                                                                   | none                |
+| `--profile-dir`   | Generate per-pipeline flamegraph SVGs                                                        | none                |
+
+### `validate-gt` -- Ground truth validation
+
+Checks ground truth file integrity and optionally fixes HTML artifacts in markdown files.
+
+```bash
+benchmark-harness validate-gt -f fixtures/ --fix
+```
+
+### `survey` -- Corpus extraction statistics
+
+Produces corpus-wide extraction statistics grouped by file type.
+
+```bash
+benchmark-harness survey -f fixtures/ --types pdf,docx
+```
+
+### `model-benchmark` -- Layout model A/B comparison
+
+Compares two layout model presets across the fixture corpus.
+
+```bash
+benchmark-harness model-benchmark -f fixtures/ --model-a fast --model-b accurate
+```
+
+### `embed-benchmark` -- Embedding throughput
+
+Benchmarks embedding throughput across all presets.
+
+```bash
+benchmark-harness embed-benchmark
+```
+
+### `list-fixtures` -- List loaded fixtures
+
+```bash
+benchmark-harness list-fixtures -f fixtures/
+```
+
+### `validate` -- Validate fixture JSON
+
+```bash
+benchmark-harness validate -f fixtures/
+```
+
+### `measure-framework-sizes` -- Installation footprints
+
+Measures disk usage of all framework installations.
+
+```bash
+benchmark-harness measure-framework-sizes --output sizes.json
+```
+
+## CI Integration
+
+The benchmark suite runs via `.github/workflows/benchmarks.yaml`, triggered by manual `workflow_dispatch`.
+
+### Execution DAG
+
+```text
+setup
+  Build harness + FFI library + validate ground truth
+    |
+    v
+bench-{language} x {single-file, batch}     (13 Kreuzberg binding jobs)
+    |
+    v
+kreuzberg-gate                                (wait for all Kreuzberg benchmarks)
+    |
+    v
+bench-{external}                              (12 reference framework jobs, some sharded)
+    |
+    v
+aggregate-and-release                         (consolidate all results -> GitHub Release)
+```
+
+### Platform
+
+- Primary: `ubuntu-24.04-arm`
+- Exception: WASM uses `ubuntu-24.04` (x86) due to V8 ARM compatibility issues
+
+### Timeouts and Artifacts
+
+- Per-job timeout: 6 hours (configurable per-document timeout)
+- Build artifacts retained: 7 days
+- Result artifacts retained: 30 days
+- Final output: aggregated JSON published as a GitHub Release
+
+## Vendored Baselines
+
+Pre-generated extraction outputs from reference tools are stored in `vendored/` for offline comparison:
+
+| Directory                    | Source                                             |
+| ---------------------------- | -------------------------------------------------- |
+| `vendored/docling/`          | Docling extraction outputs                         |
+| `vendored/paddleocr-python/` | PaddleOCR Python outputs with timing (`.ms` files) |
+| `vendored/rapidocr/`         | RapidOCR extraction outputs                        |
+
+Regenerate with:
+
+```bash
+python tools/benchmark-harness/scripts/generate_vendored_baselines.py
+```
+
+## Development
+
+```bash
+# Build
+cargo build -p benchmark-harness
+
+# Run tests
+cargo test -p benchmark-harness
+
+# Lint
+cargo clippy -p benchmark-harness -- -D warnings
+
+# Local pipeline comparison
+cargo run -p benchmark-harness -- compare \
+  -f tools/benchmark-harness/fixtures/ \
+  --pipelines baseline,layout \
+  --dump-outputs
+
+# Validate ground truth
+cargo run -p benchmark-harness -- validate-gt \
+  -f tools/benchmark-harness/fixtures/
+
+# Full pipeline benchmark with triage
+cargo run -p benchmark-harness -- pipeline-benchmark \
+  -f tools/benchmark-harness/fixtures/ \
+  --sort-by sf1 --bottom-n 20 --triage-blocks
+
+# Corpus survey
+cargo run -p benchmark-harness -- survey \
+  -f tools/benchmark-harness/fixtures/ --types pdf
+```
+
+### Optional Features
+
+| Feature            | Description                               |
+| ------------------ | ----------------------------------------- |
+| `profiling`        | Enables flamegraph generation via `pprof` |
+| `memory-profiling` | Enables jemalloc-based memory profiling   |
+
+Build with features:
+
+```bash
+cargo build -p benchmark-harness --features profiling,memory-profiling
+```
+
+### Tracing
+
+The harness uses `tracing` with `RUST_LOG` env-filter support. For quality scoring diagnostics:
+
+```bash
+RUST_LOG=benchmark_harness::markdown_quality=debug cargo run -p benchmark-harness -- compare ...
+```
--- a/tools/benchmark-harness/SCHEMA.md
+++ b/tools/benchmark-harness/SCHEMA.md
@@ -0,0 +1,266 @@
+# Aggregation Schema v2.4.0
+
+This document describes the structure of `aggregated.json` produced by `benchmark-harness consolidate`.
+
+## Top-level Shape
+
+```json
+{
+  "schema_version": "2.4.0",
+  "by_framework_mode": {
+    "<aggregate_key>": {
+      /* FrameworkModeAggregation */
+    }
+  },
+  "disk_sizes": {
+    "framework": {
+      /* DiskSizeInfo */
+    }
+  },
+  "comparison": {
+    /* ComparisonData */
+  },
+  "per_fixture_results": [
+    /* PerFixtureRow[] */
+  ],
+  "metadata": {
+    /* ConsolidationMetadata */
+  }
+}
+```
+
+## Output Format Discriminator
+
+The `output_format` field determines:
+
+- **`markdown`**: Supports all metrics including SF1 (structural F1), layout percentiles, and all ranking tables
+- **`plaintext`**: Text-only extraction; SF1 and layout percentiles are `null`; plaintext frameworks never appear in SF1 rankings
+
+## by_framework_mode
+
+Key format differs by framework family:
+
+- **kreuzberg** (`kreuzberg-*`): `{framework_name}:{mode}` — the output format is already encoded
+  in the framework name (e.g. `kreuzberg-markdown-baseline`), so repeating it in the key is
+  redundant.
+- **competitors** (all other frameworks): `{framework}:{output_format}:{mode}` — format is not
+  encoded in the name, so the key carries it explicitly.
+
+Examples:
+
+- `kreuzberg-markdown-baseline:single`
+- `kreuzberg-plaintext-paddle-ocr:batch`
+- `pdfplumber:plaintext:single`
+- `docling:markdown:single`
+
+Each entry contains:
+
+```json
+{
+  "framework": "string", // Framework name without mode suffix
+  "output_format": "markdown|plaintext", // Output format used
+  "mode": "single|batch|...", // Execution mode
+  "cold_start": {
+    /* DurationPercentiles */
+  }, // Optional, if cold start data available
+  "by_file_type": {
+    "pdf": {
+      "file_type": "pdf",
+      "no_ocr": {
+        /* PerformancePercentiles */
+      },
+      "with_ocr": {
+        /* PerformancePercentiles */
+      }
+    }
+  }
+}
+```
+
+## PerformancePercentiles
+
+Contains p50, p95, p99 for all metrics:
+
+```json
+{
+  "successful_sample_count": 42,
+  "total_sample_count": 50,
+  "framework_errors": 0,
+  "harness_errors": 5,
+  "timeouts": 3,
+  "empty_content": 0,
+  "error_details": {
+    "error message": 2
+  },
+  "duration": { "p50": 100.5, "p95": 150.2, "p99": 199.9 },
+  "throughput": { "p50": 5.2, "p95": 4.8, "p99": 3.1 },
+  "memory": { "p50": 150.0, "p95": 200.0, "p99": 250.0 },
+  "cpu": { "p50": 50.0, "p95": 75.0, "p99": 90.0 }, // Optional
+  "extraction_duration": { "p50": 80.0, "p95": 120.0, "p99": 160.0 }, // Optional
+  "quality": {
+    /* QualityPercentiles */
+  }, // Optional, if quality data available
+  "success_rate_percent": 84.0
+}
+```
+
+## QualityPercentiles
+
+Includes p50, p95, p99 for all F1 metrics. Layout percentiles are `null` for plaintext-only frameworks:
+
+```json
+{
+  "f1_text_p50": 0.92,
+  "f1_text_p95": 0.88,
+  "f1_text_p99": 0.75,
+  "f1_numeric_p50": 0.85,
+  "f1_numeric_p95": 0.8,
+  "f1_numeric_p99": 0.7,
+  "f1_layout_p50": 0.78, // null for plaintext output format
+  "f1_layout_p95": 0.72, // null for plaintext output format
+  "f1_layout_p99": 0.65, // null for plaintext output format
+  "quality_score_p50": 0.85,
+  "quality_score_p95": 0.8,
+  "quality_score_p99": 0.7
+}
+```
+
+## PerFixtureRow
+
+One row per unique combination of (framework, output_format, execution_mode, fixture_id, ocr):
+
+```json
+{
+  "framework": "kreuzberg-markdown-baseline",
+  "output_format": "markdown",
+  "execution_mode": "single",
+  "ocr": false,
+  "fixture_id": "sample_doc_1",
+  "file_type": "pdf",
+  "duration_ms": 125.4,
+  "peak_memory_mb": 180.5,
+  "f1_text": 0.92,
+  "f1_layout": 0.78, // null for plaintext mode
+  "f1_numeric": 0.85,
+  "quality_score": 0.85,
+  "correct": true,
+  "success": true,
+  "error_kind": null // "FrameworkError", "HarnessError", "Timeout", etc. if !success
+}
+```
+
+## ComparisonData
+
+Contains all cross-framework rankings split by output format for quality metrics:
+
+```json
+{
+  "performance_ranking": [
+    /* RankedFramework[] */
+  ],
+  "throughput_ranking": [
+    /* RankedFramework[] */
+  ],
+  "memory_ranking": [
+    /* RankedFramework[] */
+  ],
+  "cpu_ranking": [
+    /* RankedFramework[] */
+  ],
+  "quality_ranking": [
+    /* RankedFramework[] */
+  ],
+  "pdf_quality_ranking": [
+    /* RankedFramework[] */
+  ],
+  "pdf_tf1_ranking_markdown": [
+    /* RankedFramework[] — markdown-only */
+  ],
+  "pdf_tf1_ranking_plaintext": [
+    /* RankedFramework[] — plaintext-only */
+  ],
+  "pdf_sf1_ranking_markdown": [
+    /* RankedFramework[] — markdown-only, never plaintext */
+  ],
+  "deltas_vs_baseline": {
+    "<aggregate_key>": {
+      /* DeltaMetrics */
+    }
+  }
+}
+```
+
+### RankedFramework
+
+```json
+{
+  "framework_mode": "kreuzberg-markdown-baseline:single",
+  "rank": 1,
+  "value": 95.5, // The metric value (duration, throughput, etc.)
+  "relative": 1.0 // Ratio relative to best (1.0 = best)
+}
+```
+
+## Migration from v2.3.0 to v2.4.0
+
+### Breaking Changes
+
+1. **Schema version**: Bumped to `"2.4.0"`
+2. **Kreuzberg aggregate key format**: Changed from `framework:output_format:mode` to
+   `framework_name:mode` for all `kreuzberg-*` frameworks. Competitor key format
+   (`framework:output_format:mode`) is unchanged.
+
+### Kreuzberg Consolidation
+
+Language-binding frameworks (`kreuzberg-py`, `kreuzberg-node`, `kreuzberg-rb`, `kreuzberg-go`,
+`kreuzberg-java`, `kreuzberg-csharp`, `kreuzberg-elixir`, `kreuzberg-php`, `kreuzberg-rust`, etc.)
+have been removed. They are replaced by three native pipelines run directly via the kreuzberg CLI:
+
+| Pipeline  | Markdown name                   | Plaintext name                   |
+| --------- | ------------------------------- | -------------------------------- |
+| Baseline  | `kreuzberg-markdown-baseline`   | `kreuzberg-plaintext-baseline`   |
+| Layout    | `kreuzberg-markdown-layout`     | `kreuzberg-plaintext-layout`     |
+| PaddleOCR | `kreuzberg-markdown-paddle-ocr` | `kreuzberg-plaintext-paddle-ocr` |
+
+Batch variants append `-batch` to the framework name (e.g. `kreuzberg-markdown-baseline-batch`),
+which the harness normalises to aggregate key `kreuzberg-markdown-baseline:batch`.
+
+### Key Format Rationale
+
+The format component is implicit in the kreuzberg framework name itself. Duplicating it in the
+aggregate key (`kreuzberg-markdown-baseline:markdown:single`) would be redundant and confusing.
+Competitor names carry no format information, so they continue to need it in the key
+(`docling:markdown:single`).
+
+## Migration from v2.2.0 to v2.3.0
+
+### Breaking Changes
+
+1. **Schema version**: Bumped to `"2.3.0"`
+2. **Framework key format**: Changed from `framework:mode` to `framework:output_format:mode`
+3. **QualityPercentiles**: Added p95 and p99 percentiles for all F1 metrics; `f1_layout_*` fields are now optional (null for plaintext)
+4. **FrameworkModeAggregation**: Added `output_format` field
+5. **ComparisonData**: Replaced `pdf_tf1_ranking` with `pdf_tf1_ranking_markdown` and `pdf_tf1_ranking_plaintext`; `pdf_sf1_ranking` renamed to `pdf_sf1_ranking_markdown` (now markdown-only)
+
+### New Fields
+
+- `per_fixture_results`: Array of individual fixture results preserving per-file measurements
+- `PerFixtureRow`: New struct capturing individual extraction outcomes
+
+### Plaintext-only Behavior
+
+- Plaintext frameworks NEVER appear in `pdf_sf1_ranking_markdown`
+- Plaintext frameworks NEVER appear in `pdf_tf1_ranking_markdown` (they get their own `pdf_tf1_ranking_plaintext`)
+- SF1 and layout percentiles are `null` for plaintext output format
+- All performance rankings (speed, memory, throughput) include both formats without discrimination
+
+## ConsolidationMetadata
+
+```json
+{
+  "total_results": 500,
+  "framework_count": 5,
+  "file_type_count": 8,
+  "timestamp": "2025-05-09T10:15:30Z"
+}
+```
--- a/tools/benchmark-harness/baselines/initial_baseline.json
+++ b/tools/benchmark-harness/baselines/initial_baseline.json
--- a/tools/benchmark-harness/build.rs
+++ b/tools/benchmark-harness/build.rs
@@ -0,0 +1,15 @@
+use std::env;
+
+fn main() {
+    let target = env::var("TARGET").unwrap();
+
+    if target.contains("darwin") {
+        println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
+        println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path/.");
+    } else if target.contains("linux") {
+        println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN");
+        println!("cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/.");
+    }
+
+    println!("cargo:rerun-if-changed=build.rs");
+}
--- a/tools/benchmark-harness/fixtures/7z_archive.json
+++ b/tools/benchmark-harness/fixtures/7z_archive.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/archives/documents.7z",
+  "file_type": "7z",
+  "file_size": 216,
+  "expected_frameworks": ["kreuzberg", "tika"],
+  "metadata": {
+    "description": "7-Zip archive with text documents",
+    "category": "archive"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/7z/documents.txt",
+    "source": "manual"
+  }
+}
--- a/tools/benchmark-harness/fixtures/asciidoc_tables.json
+++ b/tools/benchmark-harness/fixtures/asciidoc_tables.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/markdown/tables.asciidoc",
+  "file_type": "asciidoc",
+  "file_size": 1537,
+  "expected_frameworks": ["kreuzberg", "docling"],
+  "metadata": {
+    "description": "AsciiDoc document with multiple table examples",
+    "category": "markup",
+    "size_class": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/asciidoc/asciidoc_tables.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/bib_comprehensive.json
+++ b/tools/benchmark-harness/fixtures/bib_comprehensive.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/bibtex/comprehensive.bib",
+  "file_type": "bib",
+  "file_size": 3568,
+  "expected_frameworks": ["kreuzberg", "pandoc", "tika"],
+  "metadata": {
+    "description": "BibTeX bibliography file with multiple entries",
+    "category": "academic"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/bib/bib_comprehensive.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/commonmark_sample.json
+++ b/tools/benchmark-harness/fixtures/commonmark_sample.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/markdown/sample.commonmark",
+  "file_type": "commonmark",
+  "file_size": 3036,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "CommonMark document with standard markdown elements including headers, lists, code blocks, links, emphasis, blockquotes, tables, and mixed formatting",
+    "category": "text"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/commonmark/commonmark_sample.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-comma-in-cell.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-comma-in-cell.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-comma-in-cell.csv",
+  "file_type": "csv",
+  "file_size": 46,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-comma-in-cell.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-comma-in-cell.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-comma.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-comma.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-comma.csv",
+  "file_type": "csv",
+  "file_size": 1005,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-comma.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-comma.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-inconsistent-header.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-inconsistent-header.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-inconsistent-header.csv",
+  "file_type": "csv",
+  "file_size": 42,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-inconsistent-header.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-inconsistent-header.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-pipe.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-pipe.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-pipe.csv",
+  "file_type": "csv",
+  "file_size": 997,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-pipe.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-pipe.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-semicolon.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-semicolon.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-semicolon.csv",
+  "file_type": "csv",
+  "file_size": 997,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-semicolon.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-semicolon.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-tab.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-tab.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-tab.csv",
+  "file_type": "csv",
+  "file_size": 997,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-tab.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-tab.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-too-few-columns.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-too-few-columns.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-too-few-columns.csv",
+  "file_type": "csv",
+  "file_size": 44,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-too-few-columns.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-too-few-columns.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/csv-too-many-columns.json
+++ b/tools/benchmark-harness/fixtures/csv/csv-too-many-columns.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/csv/csv-too-many-columns.csv",
+  "file_type": "csv",
+  "file_size": 46,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/csv-too-many-columns.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/csv-too-many-columns.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/data_table.json
+++ b/tools/benchmark-harness/fixtures/csv/data_table.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/csv/data_table.csv",
+  "file_type": "csv",
+  "file_size": 476,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "csv test: data_table",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/data_table.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/data_table.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/stanley_cups.json
+++ b/tools/benchmark-harness/fixtures/csv/stanley_cups.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/csv/stanley_cups.csv",
+  "file_type": "csv",
+  "file_size": 91,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "csv test: stanley_cups",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/stanley_cups.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/stanley_cups.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/csv/test_mskanji.json
+++ b/tools/benchmark-harness/fixtures/csv/test_mskanji.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/markitdown/csv/test_mskanji.csv",
+  "file_type": "csv",
+  "file_size": 70,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from markitdown test suite",
+    "source": "markitdown",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/csv/test_mskanji.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/csv/test_mskanji.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/dbf_stations.json
+++ b/tools/benchmark-harness/fixtures/dbf_stations.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/dbf/stations.dbf",
+  "file_type": "dbf",
+  "file_size": 87623,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "dBASE file with station records",
+    "category": "tables"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/dbf/dbf_stations.txt",
+    "source": "manual"
+  }
+}
--- a/tools/benchmark-harness/fixtures/dbk_chapter.json
+++ b/tools/benchmark-harness/fixtures/dbk_chapter.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../test_documents/docbook/docbook-chapter.dbk",
+  "file_type": "dbk",
+  "file_size": 1088,
+  "expected_frameworks": ["kreuzberg", "pandoc"],
+  "metadata": {
+    "description": "DocBook XML chapter with recursive sections (DBK extension)",
+    "category": "markup",
+    "size_class": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docbook/docbook-chapter.txt",
+    "markdown_file": "../../../test_documents/ground_truth/docbook/docbook-chapter.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/djot_tables.json
+++ b/tools/benchmark-harness/fixtures/djot_tables.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/markdown/tables.djot",
+  "file_type": "djot",
+  "file_size": 2102,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "Djot markup with tables",
+    "category": "markup"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/djot/djot_tables.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/doc/duplicate-paragraphs.json
+++ b/tools/benchmark-harness/fixtures/doc/duplicate-paragraphs.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/doc/duplicate-paragraphs.doc",
+  "file_type": "doc",
+  "file_size": 18432,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/doc/duplicate-paragraphs.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/doc/duplicate-paragraphs.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/doc/fake-doc-emphasized-text.json
+++ b/tools/benchmark-harness/fixtures/doc/fake-doc-emphasized-text.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/doc/fake-doc-emphasized-text.doc",
+  "file_type": "doc",
+  "file_size": 27648,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/doc/fake-doc-emphasized-text.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/doc/fake-doc-emphasized-text.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/doc/fake.json
+++ b/tools/benchmark-harness/fixtures/doc/fake.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/doc/fake.doc",
+  "file_type": "doc",
+  "file_size": 18432,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/doc/fake.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/doc/fake.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/doc/simple.json
+++ b/tools/benchmark-harness/fixtures/doc/simple.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/doc/simple.doc",
+  "file_type": "doc",
+  "file_size": 15872,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/doc/simple.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/doc/simple.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/doc/unit_test_lists.json
+++ b/tools/benchmark-harness/fixtures/doc/unit_test_lists.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/doc/unit_test_lists.doc",
+  "file_type": "doc",
+  "file_size": 16384,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "doc test: unit_test_lists",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/doc/unit_test_lists.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/doc/unit_test_lists.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docbook_chapter.json
+++ b/tools/benchmark-harness/fixtures/docbook_chapter.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docbook/docbook-chapter.docbook",
+  "file_type": "docbook",
+  "file_size": 1088,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "DocBook chapter with structured content",
+    "category": "documentation"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docbook/docbook-chapter.txt",
+    "markdown_file": "../../../test_documents/ground_truth/docbook/docbook-chapter.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docbook_reader.json
+++ b/tools/benchmark-harness/fixtures/docbook_reader.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/docbook/docbook-reader.docbook",
+  "file_type": "docbook",
+  "file_size": 37139,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "DocBook reader with larger content",
+    "category": "documentation"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docbook/docbook_reader.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docbook_tables4.json
+++ b/tools/benchmark-harness/fixtures/docbook_tables4.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docbook/tables.docbook4",
+  "file_type": "docbook",
+  "file_size": 7502,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "DocBook 4 table examples with simple, multiline, and headerless tables",
+    "category": "documentation"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docbook/tables.txt",
+    "markdown_file": "../../../test_documents/ground_truth/docbook/tables.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docbook_tables5.json
+++ b/tools/benchmark-harness/fixtures/docbook_tables5.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docbook/tables.docbook5",
+  "file_type": "docbook",
+  "file_size": 7502,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "DocBook 5 table examples with simple, multiline, and headerless tables",
+    "category": "documentation"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docbook/tables.txt",
+    "markdown_file": "../../../test_documents/ground_truth/docbook/tables.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docbook_xref.json
+++ b/tools/benchmark-harness/fixtures/docbook_xref.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docbook/docbook-xref.docbook",
+  "file_type": "docbook",
+  "file_size": 3129,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "DocBook cross-reference examples with XRef, links, figures, and tables",
+    "category": "documentation"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docbook/docbook-xref.txt",
+    "markdown_file": "../../../test_documents/ground_truth/docbook/docbook-xref.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/docx_grouped_images.json
+++ b/tools/benchmark-harness/fixtures/docx/docx_grouped_images.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/docx_grouped_images.docx",
+  "file_type": "docx",
+  "file_size": 207463,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/docx_grouped_images.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/docx_grouped_images.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/docx_rich_cells.json
+++ b/tools/benchmark-harness/fixtures/docx/docx_rich_cells.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/docx_rich_cells.docx",
+  "file_type": "docx",
+  "file_size": 24320,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/docx_rich_cells.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/docx_rich_cells.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/docx_tables.json
+++ b/tools/benchmark-harness/fixtures/docx/docx_tables.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/docx/docx_tables.docx",
+  "file_type": "docx",
+  "file_size": 12725,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "docx test: docx_tables",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/docx_tables.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/docx_tables.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/equations.json
+++ b/tools/benchmark-harness/fixtures/docx/equations.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/equations.docx",
+  "file_type": "docx",
+  "file_size": 15814,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "DOCX test document: equations",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "markdown_file": "../../../../test_documents/ground_truth/docx/equations.md",
+    "source": "pandoc",
+    "text_file": "../../../../test_documents/ground_truth/docx/equations.txt"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/extraction_test.json
+++ b/tools/benchmark-harness/fixtures/docx/extraction_test.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/docx/extraction_test.docx",
+  "file_type": "docx",
+  "file_size": 11296,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "docx test: extraction_test",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/extraction_test.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/extraction_test.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/fake.json
+++ b/tools/benchmark-harness/fixtures/docx/fake.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/docx/fake.docx",
+  "file_type": "docx",
+  "file_size": 36602,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "docx test: fake",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/fake.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/fake.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/issue_359_list_whitespace.json
+++ b/tools/benchmark-harness/fixtures/docx/issue_359_list_whitespace.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/docx/issue_359_list_whitespace.docx",
+  "file_type": "docx",
+  "file_size": 9170,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "docx test: issue_359_list_whitespace",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/issue_359_list_whitespace.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/issue_359_list_whitespace.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/list_after_num_headers.json
+++ b/tools/benchmark-harness/fixtures/docx/list_after_num_headers.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/list_after_num_headers.docx",
+  "file_type": "docx",
+  "file_size": 15698,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/list_after_num_headers.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/list_after_num_headers.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/lorem_ipsum.json
+++ b/tools/benchmark-harness/fixtures/docx/lorem_ipsum.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/lorem_ipsum.docx",
+  "file_type": "docx",
+  "file_size": 14817,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/lorem_ipsum.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/lorem_ipsum.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/rlink.json
+++ b/tools/benchmark-harness/fixtures/docx/rlink.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/markitdown/docx/rlink.docx",
+  "file_type": "docx",
+  "file_size": 13708,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from markitdown test suite",
+    "source": "markitdown",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/rlink.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/rlink.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/sample_document.json
+++ b/tools/benchmark-harness/fixtures/docx/sample_document.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/docx/sample_document.docx",
+  "file_type": "docx",
+  "file_size": 103966,
+  "expected_frameworks": ["kreuzberg"],
+  "metadata": {
+    "description": "docx test: sample_document",
+    "source": "pandoc-generated",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/sample_document.txt",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/sample_document.md",
+    "source": "pandoc"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/table_with_equations.json
+++ b/tools/benchmark-harness/fixtures/docx/table_with_equations.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/table_with_equations.docx",
+  "file_type": "docx",
+  "file_size": 14228,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/table_with_equations.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/table_with_equations.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/tablecell.json
+++ b/tools/benchmark-harness/fixtures/docx/tablecell.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/tablecell.docx",
+  "file_type": "docx",
+  "file_size": 15180,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/tablecell.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/tablecell.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/test.json
+++ b/tools/benchmark-harness/fixtures/docx/test.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/markitdown/docx/test.docx",
+  "file_type": "docx",
+  "file_size": 135824,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from markitdown test suite",
+    "source": "markitdown",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/test.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/test.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/test_emf_docx.json
+++ b/tools/benchmark-harness/fixtures/docx/test_emf_docx.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/test_emf_docx.docx",
+  "file_type": "docx",
+  "file_size": 426097,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/test_emf_docx.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/test_emf_docx.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/test_with_comment.json
+++ b/tools/benchmark-harness/fixtures/docx/test_with_comment.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/markitdown/docx/test_with_comment.docx",
+  "file_type": "docx",
+  "file_size": 12971,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from markitdown test suite",
+    "source": "markitdown",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/test_with_comment.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/test_with_comment.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/textbox.json
+++ b/tools/benchmark-harness/fixtures/docx/textbox.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/textbox.docx",
+  "file_type": "docx",
+  "file_size": 49206,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/textbox.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/textbox.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/unit_test_formatting.json
+++ b/tools/benchmark-harness/fixtures/docx/unit_test_formatting.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/unit_test_formatting.docx",
+  "file_type": "docx",
+  "file_size": 29099,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/unit_test_formatting.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_formatting.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/unit_test_headers.json
+++ b/tools/benchmark-harness/fixtures/docx/unit_test_headers.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/unit_test_headers.docx",
+  "file_type": "docx",
+  "file_size": 13903,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/unit_test_headers.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_headers.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/unit_test_headers_numbered.json
+++ b/tools/benchmark-harness/fixtures/docx/unit_test_headers_numbered.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/unit_test_headers_numbered.docx",
+  "file_type": "docx",
+  "file_size": 16880,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/unit_test_headers_numbered.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_headers_numbered.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/unit_test_lists.json
+++ b/tools/benchmark-harness/fixtures/docx/unit_test_lists.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/unit_test_lists.docx",
+  "file_type": "docx",
+  "file_size": 15769,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/unit_test_lists.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_lists.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/word_comments.json
+++ b/tools/benchmark-harness/fixtures/docx/word_comments.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/word_comments.docx",
+  "file_type": "docx",
+  "file_size": 37399,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/word_comments.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/word_comments.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/word_image_anchors.json
+++ b/tools/benchmark-harness/fixtures/docx/word_image_anchors.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/word_image_anchors.docx",
+  "file_type": "docx",
+  "file_size": 18560,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/word_image_anchors.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/word_image_anchors.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/word_sample.json
+++ b/tools/benchmark-harness/fixtures/docx/word_sample.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/word_sample.docx",
+  "file_type": "docx",
+  "file_size": 103966,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/word_sample.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/word_sample.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx/word_tables.json
+++ b/tools/benchmark-harness/fixtures/docx/word_tables.json
@@ -0,0 +1,16 @@
+{
+  "document": "../../../../test_documents/vendored/docling/docx/word_tables.docx",
+  "file_type": "docx",
+  "file_size": 16404,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from docling test suite",
+    "source": "docling",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/docx/word_tables.txt",
+    "source": "pandoc",
+    "markdown_file": "../../../../test_documents/ground_truth/docx/word_tables.md"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx_equations.json
+++ b/tools/benchmark-harness/fixtures/docx_equations.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docx/equations.docx",
+  "file_type": "docx",
+  "file_size": 15017,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "DOCX with mathematical equations - 15KB document with complex formatting",
+    "category": "docx-equations",
+    "size_class": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docx/docx_equations.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx_images.json
+++ b/tools/benchmark-harness/fixtures/docx_images.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docx/word_image_anchors.docx",
+  "file_type": "docx",
+  "file_size": 18560,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "DOCX with embedded images and anchors - 18KB document",
+    "category": "docx-images",
+    "size_class": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docx/docx_images.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx_large_formatted.json
+++ b/tools/benchmark-harness/fixtures/docx_large_formatted.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../test_documents/docx/test_emf_docx.docx",
+  "file_type": "docx",
+  "file_size": 426097,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Large formatted DOCX - 416KB document with EMF graphics",
+    "category": "docx-complex",
+    "size_class": "medium"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docx/docx_large_formatted.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/docx_simple.json
+++ b/tools/benchmark-harness/fixtures/docx_simple.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/docx/lorem_ipsum.docx",
+  "file_type": "docx",
+  "file_size": 14817,
+  "expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
+  "metadata": {
+    "description": "Simple DOCX - Lorem ipsum text",
+    "category": "text"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/docx/docx_simple.txt",
+    "source": "vision"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-equals-attachment-filename.json
+++ b/tools/benchmark-harness/fixtures/eml/email-equals-attachment-filename.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-equals-attachment-filename.eml",
+  "file_type": "eml",
+  "file_size": 3297,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-equals-attachment-filename.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-inline-content-disposition.json
+++ b/tools/benchmark-harness/fixtures/eml/email-inline-content-disposition.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-inline-content-disposition.eml",
+  "file_type": "eml",
+  "file_size": 657,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-inline-content-disposition.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-no-html-content-1.json
+++ b/tools/benchmark-harness/fixtures/eml/email-no-html-content-1.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-no-html-content-1.eml",
+  "file_type": "eml",
+  "file_size": 7721,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-no-html-content-1.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-no-utf8-2008-07-16.062410.json
+++ b/tools/benchmark-harness/fixtures/eml/email-no-utf8-2008-07-16.062410.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-no-utf8-2008-07-16.062410.eml",
+  "file_type": "eml",
+  "file_size": 31978,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-no-utf8-2008-07-16.062410.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-no-utf8-2014-03-17.111517.json
+++ b/tools/benchmark-harness/fixtures/eml/email-no-utf8-2014-03-17.111517.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-no-utf8-2014-03-17.111517.eml",
+  "file_type": "eml",
+  "file_size": 14954,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-no-utf8-2014-03-17.111517.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-1.json
+++ b/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-1.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-1.eml",
+  "file_type": "eml",
+  "file_size": 16085,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-1.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-2.json
+++ b/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-2.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-2.eml",
+  "file_type": "eml",
+  "file_size": 26271,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-2.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-3.json
+++ b/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-3.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-3.eml",
+  "file_type": "eml",
+  "file_size": 56028,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-3.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-4.json
+++ b/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-4.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-4.eml",
+  "file_type": "eml",
+  "file_size": 34433,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-4.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-5.json
+++ b/tools/benchmark-harness/fixtures/eml/email-replace-mime-encodings-error-5.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-5.eml",
+  "file_type": "eml",
+  "file_size": 14567,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-5.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/email-with-image.json
+++ b/tools/benchmark-harness/fixtures/eml/email-with-image.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/email-with-image.eml",
+  "file_type": "eml",
+  "file_size": 296696,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/email-with-image.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-attachment.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-attachment.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-attachment.eml",
+  "file_type": "eml",
+  "file_size": 1704,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-attachment.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-b64.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-b64.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-b64.eml",
+  "file_type": "eml",
+  "file_size": 979,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-b64.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-header.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-header.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-header.eml",
+  "file_type": "eml",
+  "file_size": 1207,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-header.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-image-embedded.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-image-embedded.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-image-embedded.eml",
+  "file_type": "eml",
+  "file_size": 297126,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-image-embedded.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-malformed-encoding.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-malformed-encoding.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-malformed-encoding.eml",
+  "file_type": "eml",
+  "file_size": 898,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-malformed-encoding.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-utf-16-be.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-utf-16-be.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-utf-16-be.eml",
+  "file_type": "eml",
+  "file_size": 1614,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-utf-16-be.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-utf-16-le.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-utf-16-le.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-utf-16-le.eml",
+  "file_type": "eml",
+  "file_size": 1614,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-utf-16-le.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email-utf-16.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email-utf-16.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email-utf-16.eml",
+  "file_type": "eml",
+  "file_size": 1616,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email-utf-16.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-email.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-email.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-email.eml",
+  "file_type": "eml",
+  "file_size": 807,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-email.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/fake-encrypted.json
+++ b/tools/benchmark-harness/fixtures/eml/fake-encrypted.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/fake-encrypted.eml",
+  "file_type": "eml",
+  "file_size": 669,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/fake-encrypted.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/family-day.json
+++ b/tools/benchmark-harness/fixtures/eml/family-day.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/family-day.eml",
+  "file_type": "eml",
+  "file_size": 1291,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/family-day.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-attach-mp3.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-attach-mp3.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-attach-mp3.eml",
+  "file_type": "eml",
+  "file_size": 70911,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-attach-mp3.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-different-plain-html.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-different-plain-html.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-different-plain-html.eml",
+  "file_type": "eml",
+  "file_size": 1397,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-different-plain-html.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-html-only.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-html-only.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-html-only.eml",
+  "file_type": "eml",
+  "file_size": 640,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-html-only.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-multi-to-cc-bcc.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-multi-to-cc-bcc.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-multi-to-cc-bcc.eml",
+  "file_type": "eml",
+  "file_size": 350,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-multi-to-cc-bcc.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-multipart-digest.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-multipart-digest.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-multipart-digest.eml",
+  "file_type": "eml",
+  "file_size": 721,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-multipart-digest.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-no-body.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-no-body.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-no-body.eml",
+  "file_type": "eml",
+  "file_size": 985,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-no-body.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-no-subject.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-no-subject.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-no-subject.eml",
+  "file_type": "eml",
+  "file_size": 162,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-no-subject.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-no-to.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-no-to.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-no-to.eml",
+  "file_type": "eml",
+  "file_size": 264,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-no-to.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-simple.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-simple.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-simple.eml",
+  "file_type": "eml",
+  "file_size": 452,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-simple.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/mime-word-encoded-subject.json
+++ b/tools/benchmark-harness/fixtures/eml/mime-word-encoded-subject.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/mime-word-encoded-subject.eml",
+  "file_type": "eml",
+  "file_size": 261,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/mime-word-encoded-subject.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/rfc822-no-date.json
+++ b/tools/benchmark-harness/fixtures/eml/rfc822-no-date.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/rfc822-no-date.eml",
+  "file_type": "eml",
+  "file_size": 232,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/rfc822-no-date.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/simple-rfc-822.json
+++ b/tools/benchmark-harness/fixtures/eml/simple-rfc-822.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/simple-rfc-822.eml",
+  "file_type": "eml",
+  "file_size": 679,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/simple-rfc-822.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/test-invalid-date.json
+++ b/tools/benchmark-harness/fixtures/eml/test-invalid-date.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/test-invalid-date.eml",
+  "file_type": "eml",
+  "file_size": 161,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/test-invalid-date.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/test-iso-8601-date.json
+++ b/tools/benchmark-harness/fixtures/eml/test-iso-8601-date.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/test-iso-8601-date.eml",
+  "file_type": "eml",
+  "file_size": 135,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/test-iso-8601-date.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml/test-rfc2822-date.json
+++ b/tools/benchmark-harness/fixtures/eml/test-rfc2822-date.json
@@ -0,0 +1,15 @@
+{
+  "document": "../../../../test_documents/vendored/unstructured/eml/test-rfc2822-date.eml",
+  "file_type": "eml",
+  "file_size": 151,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Document from unstructured test suite",
+    "source": "unstructured",
+    "size_category": "small"
+  },
+  "ground_truth": {
+    "text_file": "../../../../test_documents/ground_truth/eml/test-rfc2822-date.txt",
+    "source": "python_email"
+  }
+}
--- a/tools/benchmark-harness/fixtures/eml_attachments.json
+++ b/tools/benchmark-harness/fixtures/eml_attachments.json
@@ -0,0 +1,14 @@
+{
+  "document": "../../../test_documents/email/mailgun_pdf_attachment.eml",
+  "file_type": "eml",
+  "file_size": 1514,
+  "expected_frameworks": ["kreuzberg", "tika", "unstructured"],
+  "metadata": {
+    "description": "Email with PDF attachment from Mailgun",
+    "category": "attachments"
+  },
+  "ground_truth": {
+    "text_file": "../../../test_documents/ground_truth/eml/eml_attachments.txt",
+    "source": "python_email"
+  }
+}
--- a/Show More
+++ b/Show More