Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/fixtures/async/async_extract_bytes.json
+++ b/fixtures/async/async_extract_bytes.json
@@ -0,0 +1,30 @@
+{
+  "id": "async_extract_bytes",
+  "category": "async",
+  "description": "Async extract_bytes call on PDF document",
+  "tags": ["async", "api", "extract_bytes"],
+  "call": "extract_bytes",
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  },
+  "input": {
+    "data": "pdf/fake_memo.pdf",
+    "mime_type": "application/pdf"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 50
+    }
+  ]
+}
--- a/fixtures/async/async_extract_bytes_empty_mime.json
+++ b/fixtures/async/async_extract_bytes_empty_mime.json
@@ -0,0 +1,9 @@
+{
+  "id": "async_extract_bytes_empty_mime",
+  "category": "async",
+  "description": "extract_bytes empty MIME async",
+  "tags": ["async", "error"],
+  "call": "extract_bytes",
+  "input": { "data": "text/plain.txt", "mime_type": "", "config": {} },
+  "assertions": [{ "type": "error" }]
+}
--- a/fixtures/async/async_extract_bytes_invalid_mime.json
+++ b/fixtures/async/async_extract_bytes_invalid_mime.json
@@ -0,0 +1,9 @@
+{
+  "id": "async_extract_bytes_invalid_mime",
+  "category": "async",
+  "description": "extract_bytes unsupported MIME async",
+  "tags": ["async", "error"],
+  "call": "extract_bytes",
+  "input": { "data": "text/plain.txt", "mime_type": "application/x-nonexistent", "config": {} },
+  "assertions": [{ "type": "error" }]
+}
--- a/fixtures/batch/batch_bytes_invalid_mime.json
+++ b/fixtures/batch/batch_bytes_invalid_mime.json
@@ -0,0 +1,11 @@
+{
+  "id": "batch_bytes_invalid_mime",
+  "category": "batch",
+  "description": "batch_extract_bytes_sync invalid MIME",
+  "tags": ["batch", "error"],
+  "call": "batch_extract_bytes_sync",
+  "input": {
+    "items": [{ "content": [72, 101, 108, 108, 111], "mime_type": "application/x-nonexistent" }]
+  },
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/batch/batch_extract_bytes_happy.json
+++ b/fixtures/batch/batch_extract_bytes_happy.json
@@ -0,0 +1,30 @@
+{
+  "id": "batch_extract_bytes_happy",
+  "category": "batch",
+  "description": "batch_extract_bytes: happy path with mixed inputs",
+  "call": "batch_extract_bytes",
+  "input": {
+    "items": [
+      {
+        "content": [72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33],
+        "mime_type": "text/plain"
+      },
+      {
+        "content": [
+          60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111,
+          100, 121, 62, 60, 47, 104, 116, 109, 108, 62
+        ],
+        "mime_type": "text/html"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "count_min",
+      "value": 1
+    }
+  ]
+}
--- a/fixtures/batch/batch_extract_bytes_mixed_format.json
+++ b/fixtures/batch/batch_extract_bytes_mixed_format.json
@@ -0,0 +1,19 @@
+{
+  "id": "batch_extract_bytes_mixed_format",
+  "category": "batch",
+  "description": "batch_extract_bytes: handles unsupported MIME gracefully",
+  "call": "batch_extract_bytes",
+  "input": {
+    "items": [
+      {
+        "content": [80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114],
+        "mime_type": "application/x-unknown"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/batch/batch_extract_bytes_size_cap.json
+++ b/fixtures/batch/batch_extract_bytes_size_cap.json
@@ -0,0 +1,51 @@
+{
+  "id": "batch_extract_bytes_size_cap",
+  "category": "batch",
+  "description": "batch_extract_bytes: archive size cap triggers error",
+  "call": "batch_extract_bytes",
+  "skip": {
+    "languages": [
+      "rust",
+      "node",
+      "python",
+      "php",
+      "wasm",
+      "go",
+      "r",
+      "ruby",
+      "csharp",
+      "elixir",
+      "kotlin",
+      "kotlin_android",
+      "swift",
+      "zig",
+      "java",
+      "dart"
+    ],
+    "reason": "SecurityLimits.max_content_size is only enforced by archive/Excel extractors; test requires actual archive format to trigger error, which is not easily testable via byte fixtures"
+  },
+  "input": {
+    "items": [
+      {
+        "content": [
+          97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+          97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+          97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+          97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
+          97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97
+        ],
+        "mime_type": "text/plain"
+      }
+    ],
+    "config": {
+      "security_limits": {
+        "max_content_size": 1
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "error"
+    }
+  ]
+}
--- a/fixtures/batch/batch_extract_bytes_sync_empty_list.json
+++ b/fixtures/batch/batch_extract_bytes_sync_empty_list.json
@@ -0,0 +1,18 @@
+{
+  "id": "batch_extract_bytes_sync_empty_list",
+  "category": "batch",
+  "description": "batch_extract_bytes_sync: empty batch",
+  "call": "batch_extract_bytes_sync",
+  "input": {
+    "items": []
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "count_equals",
+      "value": 0
+    }
+  ]
+}
--- a/fixtures/batch/batch_extract_bytes_sync_invalid_mime.json
+++ b/fixtures/batch/batch_extract_bytes_sync_invalid_mime.json
@@ -0,0 +1,19 @@
+{
+  "id": "batch_extract_bytes_sync_invalid_mime",
+  "category": "batch",
+  "description": "batch_extract_bytes_sync: unsupported MIME",
+  "call": "batch_extract_bytes_sync",
+  "input": {
+    "items": [
+      {
+        "content": [100, 97, 116, 97],
+        "mime_type": "application/x-unknown"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/batch/batch_file_async_basic.json
+++ b/fixtures/batch/batch_file_async_basic.json
@@ -0,0 +1,33 @@
+{
+  "id": "batch_file_async_basic",
+  "category": "batch",
+  "description": "Extract text from multiple files asynchronously",
+  "tags": [
+    "batch",
+    "async",
+    "concurrent",
+    "multiple_files"
+  ],
+  "call": "batch_extract_files",
+  "input": {
+    "paths": [
+      {
+        "path": "pdf/fake_memo.pdf"
+      },
+      {
+        "path": "text/fake_text.txt"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/batch/batch_file_async_not_found.json
+++ b/fixtures/batch/batch_file_async_not_found.json
@@ -0,0 +1,28 @@
+{
+  "id": "batch_file_async_not_found",
+  "category": "batch",
+  "description": "batch_extract_file async nonexistent",
+  "tags": [
+    "batch",
+    "async"
+  ],
+  "call": "batch_extract_files",
+  "input": {
+    "paths": [
+      {
+        "path": "/nonexistent/a.pdf"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/batch/batch_file_not_found.json
+++ b/fixtures/batch/batch_file_not_found.json
@@ -0,0 +1,31 @@
+{
+  "id": "batch_file_not_found",
+  "category": "batch",
+  "description": "batch_extract_file_sync nonexistent",
+  "tags": [
+    "batch",
+    "error"
+  ],
+  "call": "batch_extract_files_sync",
+  "input": {
+    "paths": [
+      {
+        "path": "/nonexistent/a.pdf"
+      },
+      {
+        "path": "/nonexistent/b.txt"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/batch/batch_file_partial.json
+++ b/fixtures/batch/batch_file_partial.json
@@ -0,0 +1,30 @@
+{
+  "id": "batch_file_partial",
+  "category": "batch",
+  "description": "batch_extract_file_sync mixed",
+  "tags": [
+    "batch"
+  ],
+  "call": "batch_extract_files_sync",
+  "input": {
+    "paths": [
+      {
+        "path": "text/plain.txt"
+      },
+      {
+        "path": "/nonexistent/missing.pdf"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/batch/batch_file_sync_basic.json
+++ b/fixtures/batch/batch_file_sync_basic.json
@@ -0,0 +1,32 @@
+{
+  "id": "batch_file_sync_basic",
+  "category": "batch",
+  "description": "Extract text from multiple files synchronously",
+  "tags": [
+    "batch",
+    "sync",
+    "multiple_files"
+  ],
+  "call": "batch_extract_files_sync",
+  "input": {
+    "paths": [
+      {
+        "path": "pdf/fake_memo.pdf"
+      },
+      {
+        "path": "text/fake_text.txt"
+      }
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/code/code_shebang_detection.json
+++ b/fixtures/code/code_shebang_detection.json
@@ -0,0 +1,41 @@
+{
+  "id": "code_shebang_detection",
+  "category": "code",
+  "description": "Test language detection from shebang line via bytes input",
+  "tags": [
+    "code",
+    "shebang",
+    "tree-sitter"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "code/script.sh",
+    "mime_type": "text/x-source-code"
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "text/x-source-code"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_all",
+      "field": "content",
+      "values": [
+        "build",
+        "clean"
+      ]
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/api_batch_bytes_async.json
+++ b/fixtures/contract/api_batch_bytes_async.json
@@ -0,0 +1,30 @@
+{
+  "id": "api_batch_bytes_async",
+  "description": "Tests async batch bytes extraction API (batch_extract_bytes)",
+  "tags": ["contract", "api", "batch"],
+  "call": "extract_file",
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  },
+  "input": {
+    "path": "pdf/fake_memo.pdf"
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["May 5, 2023", "Mallori"]
+    }
+  ]
+}
--- a/fixtures/contract/api_batch_bytes_with_configs_async.json
+++ b/fixtures/contract/api_batch_bytes_with_configs_async.json
@@ -0,0 +1,33 @@
+{
+  "id": "api_batch_bytes_with_configs_async",
+  "description": "Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)",
+  "tags": ["contract", "api", "batch", "file_config"],
+  "call": "extract_file",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "output_format": "markdown"
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "equals",
+      "field": "metadata.output_format",
+      "value": "markdown"
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  }
+}
--- a/fixtures/contract/api_batch_file_async.json
+++ b/fixtures/contract/api_batch_file_async.json
@@ -0,0 +1,30 @@
+{
+  "id": "api_batch_file_async",
+  "description": "Tests async batch file extraction API (batch_extract_file)",
+  "tags": ["contract", "api", "batch"],
+  "call": "extract_file",
+  "input": {
+    "path": "pdf/fake_memo.pdf"
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["May 5, 2023", "Mallori"]
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  }
+}
--- a/fixtures/contract/api_batch_file_with_configs_async.json
+++ b/fixtures/contract/api_batch_file_with_configs_async.json
@@ -0,0 +1,33 @@
+{
+  "id": "api_batch_file_with_configs_async",
+  "description": "Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)",
+  "tags": ["contract", "api", "batch", "file_config"],
+  "call": "extract_file",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "output_format": "markdown"
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "equals",
+      "field": "metadata.output_format",
+      "value": "markdown"
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  }
+}
--- a/fixtures/contract/api_extract_bytes_async.json
+++ b/fixtures/contract/api_extract_bytes_async.json
@@ -0,0 +1,30 @@
+{
+  "id": "api_extract_bytes_async",
+  "description": "Tests async bytes extraction API (extract_bytes)",
+  "tags": ["contract", "api"],
+  "call": "extract_file",
+  "input": {
+    "path": "pdf/fake_memo.pdf"
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["May 5, 2023", "Mallori"]
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  }
+}
--- a/fixtures/contract/api_extract_file_async.json
+++ b/fixtures/contract/api_extract_file_async.json
@@ -0,0 +1,29 @@
+{
+  "id": "api_extract_file_async",
+  "description": "Tests async file extraction API (extract_file)",
+  "tags": ["contract", "api"],
+  "input": {
+    "path": "pdf/fake_memo.pdf"
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["May 5, 2023", "Mallori"]
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
+  }
+}
--- a/fixtures/contract/config_chunking_prepend_heading_context.json
+++ b/fixtures/contract/config_chunking_prepend_heading_context.json
@@ -0,0 +1,52 @@
+{
+  "id": "config_chunking_prepend_heading_context",
+  "description": "Tests markdown chunker prepends heading hierarchy to chunk content",
+  "tags": [
+    "contract",
+    "config",
+    "chunking",
+    "heading-context"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "markdown/extraction_test.md",
+    "config": {
+      "chunking": {
+        "chunker_type": "markdown",
+        "max_chars": 300,
+        "max_overlap": 50,
+        "prepend_heading_context": true
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "count_min",
+      "field": "chunks",
+      "value": 2
+    },
+    {
+      "type": "is_true",
+      "field": "chunks_have_content"
+    },
+    {
+      "type": "is_true",
+      "field": "chunks_have_heading_context"
+    },
+    {
+      "type": "is_true",
+      "field": "first_chunk_starts_with_heading"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_document_structure_with_headings.json
+++ b/fixtures/contract/config_document_structure_with_headings.json
@@ -0,0 +1,37 @@
+{
+  "id": "config_document_structure_with_headings",
+  "description": "Tests document structure with DOCX heading-driven nesting",
+  "tags": [
+    "contract",
+    "document_structure"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "docx/fake.docx",
+    "config": {
+      "include_document_structure": true
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    },
+    {
+      "type": "not_empty",
+      "field": "document"
+    },
+    {
+      "type": "count_min",
+      "field": "document.nodes",
+      "value": 1
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_element_types.json
+++ b/fixtures/contract/config_element_types.json
@@ -0,0 +1,36 @@
+{
+  "id": "config_element_types",
+  "description": "Tests element-based result format with element type assertions on DOCX",
+  "tags": [
+    "contract",
+    "config",
+    "result_format"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "docx/unit_test_headers.docx",
+    "config": {
+      "result_format": "element_based"
+    }
+  },
+  "assertions": [
+    {
+      "type": "contains_any",
+      "field": "mime_type",
+      "values": [
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+      ]
+    },
+    {
+      "type": "count_min",
+      "field": "elements",
+      "value": 1
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_embedding_plugin.json
+++ b/fixtures/contract/config_embedding_plugin.json
@@ -0,0 +1,32 @@
+{
+  "id": "config_embedding_plugin",
+  "description": "Tests EmbeddingModelType::Plugin variant deserialization in ChunkingConfig — config accepts the plugin variant shape; actual dispatch requires a host-language backend registered via register_embedding_backend at runtime",
+  "tags": ["contract", "config", "embeddings", "plugin"],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "chunking": {
+        "max_chars": 500,
+        "max_overlap": 50,
+        "embedding": {
+          "model": {
+            "type": "plugin",
+            "name": "test-plugin-backend"
+          },
+          "normalize": true,
+          "max_embed_duration_secs": 30
+        }
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": ["python", "rust", "node", "go", "ruby", "elixir", "wasm", "java", "csharp", "php", "r", "dart", "kotlin_android", "swift", "zig"],
+    "reason": "EmbeddingModelType::Plugin requires a host-language backend registered via register_embedding_backend before dispatch; the e2e harness cannot register one. This fixture validates config round-trip (the {\"type\":\"plugin\",\"name\":...} shape is accepted by every binding's EmbeddingConfig)."
+  }
+}
--- a/fixtures/contract/config_extraction_timeout.json
+++ b/fixtures/contract/config_extraction_timeout.json
@@ -0,0 +1,34 @@
+{
+  "id": "config_extraction_timeout",
+  "description": "Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions",
+  "tags": [
+    "contract",
+    "config",
+    "timeout"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "extraction_timeout_secs": 300
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_keywords.json
+++ b/fixtures/contract/config_keywords.json
@@ -0,0 +1,46 @@
+{
+  "id": "config_keywords",
+  "description": "Tests keyword extraction via YAKE algorithm",
+  "tags": [
+    "contract",
+    "config",
+    "keywords"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "keywords": {
+        "algorithm": "yake",
+        "max_keywords": 10
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "not_empty",
+      "field": "keywords"
+    },
+    {
+      "type": "count_min",
+      "field": "keywords",
+      "value": 1
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_llm_structured_extraction.json
+++ b/fixtures/contract/config_llm_structured_extraction.json
@@ -0,0 +1,52 @@
+{
+  "id": "config_llm_structured_extraction",
+  "description": "Tests structured extraction via liter-llm with JSON schema",
+  "tags": ["contract", "config", "liter-llm", "structured-extraction"],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "structured_extraction": {
+        "schema": {
+          "type": "object",
+          "properties": {
+            "title": {
+              "type": "string"
+            },
+            "date": {
+              "type": "string"
+            },
+            "summary": {
+              "type": "string"
+            }
+          },
+          "required": ["title"]
+        },
+        "schema_name": "memo_data",
+        "llm": {
+          "model": "openai/gpt-4o"
+        }
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "not_empty",
+      "field": "structured_output"
+    }
+  ],
+  "skip": {
+    "languages": ["python", "rust", "node", "go", "ruby", "elixir", "wasm", "java", "csharp", "php", "r", "dart", "kotlin_android", "swift", "zig"],
+    "reason": "Requires liter-llm feature and KREUZBERG_LLM_API_KEY env var; runtime-only skip"
+  }
+}
--- a/fixtures/contract/config_pages.json
+++ b/fixtures/contract/config_pages.json
@@ -0,0 +1,43 @@
+{
+  "id": "config_pages",
+  "description": "Tests page extraction and page marker configuration",
+  "tags": [
+    "contract",
+    "config"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "pages": {
+        "extract_pages": true,
+        "insert_page_markers": true
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": [
+        "PAGE"
+      ]
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_quality_enabled.json
+++ b/fixtures/contract/config_quality_enabled.json
@@ -0,0 +1,48 @@
+{
+  "id": "config_quality_enabled",
+  "description": "Tests quality scoring produces a score value in [0.0, 1.0]",
+  "tags": [
+    "contract",
+    "config",
+    "quality"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "enable_quality_processing": true
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "not_empty",
+      "field": "quality_score"
+    },
+    {
+      "type": "greater_than_or_equal",
+      "field": "quality_score",
+      "value": 0.0
+    },
+    {
+      "type": "less_than_or_equal",
+      "field": "quality_score",
+      "value": 1.0
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_security_limits.json
+++ b/fixtures/contract/config_security_limits.json
@@ -0,0 +1,41 @@
+{
+  "id": "config_security_limits",
+  "description": "Tests archive extraction with custom security limits",
+  "tags": [
+    "contract",
+    "config",
+    "security"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "archives/documents.zip",
+    "config": {
+      "security_limits": {
+        "max_archive_size": 104857600,
+        "max_compression_ratio": 50,
+        "max_files_in_archive": 100
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "contains_any",
+      "field": "mime_type",
+      "values": [
+        "application/zip",
+        "application/x-zip-compressed"
+      ]
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/config_tree_sitter.json
+++ b/fixtures/contract/config_tree_sitter.json
@@ -0,0 +1,51 @@
+{
+  "id": "config_tree_sitter",
+  "description": "Tests tree-sitter configuration round-trip",
+  "tags": [
+    "contract",
+    "config",
+    "tree-sitter"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "code/hello.py",
+    "config": {
+      "tree_sitter": {
+        "languages": [
+          "python",
+          "rust"
+        ],
+        "groups": [
+          "web"
+        ],
+        "process": {
+          "structure": true,
+          "imports": true,
+          "exports": true,
+          "comments": false,
+          "docstrings": false,
+          "symbols": false,
+          "diagnostics": false
+        }
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "text/x-source-code"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 5
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/contract/output_format_bytes_markdown.json
+++ b/fixtures/contract/output_format_bytes_markdown.json
@@ -0,0 +1,30 @@
+{
+  "id": "output_format_bytes_markdown",
+  "description": "Tests markdown output format via bytes extraction API",
+  "tags": ["contract", "output_format", "bytes"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "pdf/fake_memo.pdf",
+    "mime_type": "application/pdf",
+    "config": {
+      "output_format": "markdown"
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "equals",
+      "field": "metadata.output_format",
+      "value": "markdown"
+    }
+  ]
+}
--- a/fixtures/contract/output_format_markdown.json
+++ b/fixtures/contract/output_format_markdown.json
@@ -0,0 +1,38 @@
+{
+  "id": "output_format_markdown",
+  "description": "Tests Markdown output format",
+  "tags": [
+    "contract",
+    "output_format"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pdf/fake_memo.pdf",
+    "config": {
+      "output_format": "markdown"
+    }
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/pdf"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "equals",
+      "field": "metadata.output_format",
+      "value": "markdown"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/detection/detect_mime_bytes_html.json
+++ b/fixtures/detection/detect_mime_bytes_html.json
@@ -0,0 +1,11 @@
+{
+  "id": "detect_mime_bytes_html",
+  "category": "detection",
+  "description": "Detect HTML MIME from bytes",
+  "tags": ["mime_detection", "bytes"],
+  "call": "detect_mime_type_from_bytes",
+  "input": {
+    "data": "html/html.html"
+  },
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/detection/detect_mime_bytes_pdf.json
+++ b/fixtures/detection/detect_mime_bytes_pdf.json
@@ -0,0 +1,15 @@
+{
+  "id": "detect_mime_bytes_pdf",
+  "category": "detection",
+  "description": "Detect PDF MIME type from bytes",
+  "tags": ["mime_detection", "bytes", "pdf"],
+  "call": "detect_mime_type_from_bytes",
+  "input": {
+    "data": "pdf/fake_memo.pdf"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/detection/detect_mime_bytes_png.json
+++ b/fixtures/detection/detect_mime_bytes_png.json
@@ -0,0 +1,15 @@
+{
+  "id": "detect_mime_bytes_png",
+  "category": "detection",
+  "description": "Detect PNG MIME type from bytes",
+  "tags": ["mime_detection", "bytes", "png"],
+  "call": "detect_mime_type_from_bytes",
+  "input": {
+    "data": "images/test_hello_world.png"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/detection/get_extensions_unknown_mime.json
+++ b/fixtures/detection/get_extensions_unknown_mime.json
@@ -0,0 +1,11 @@
+{
+  "id": "get_extensions_unknown_mime",
+  "category": "detection",
+  "description": "get_extensions unknown MIME",
+  "tags": ["mime_detection", "error"],
+  "call": "get_extensions_for_mime",
+  "input": {
+    "mime_type": "application/x-totally-unknown"
+  },
+  "assertions": [{ "type": "error" }]
+}
--- a/fixtures/embed_async_pending/embed_texts_async_empty_input.json
+++ b/fixtures/embed_async_pending/embed_texts_async_empty_input.json
@@ -0,0 +1,19 @@
+{
+  "id": "embed_texts_async_empty_input",
+  "category": "embed_async_pending",
+  "description": "embed_texts_async: empty text list",
+  "call": "embed_texts_async",
+  "input": {
+    "texts": []
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "count_equals",
+      "field": "embeddings",
+      "value": 0
+    }
+  ]
+}
--- a/fixtures/embed_async_pending/embed_texts_async_happy.json
+++ b/fixtures/embed_async_pending/embed_texts_async_happy.json
@@ -0,0 +1,22 @@
+{
+  "id": "embed_texts_async_happy",
+  "category": "embed_async_pending",
+  "description": "embed_texts_async: basic async embedding",
+  "call": "embed_texts_async",
+  "input": {
+    "texts": [
+      "First",
+      "Second"
+    ]
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "count_min",
+      "field": "embeddings",
+      "value": 2
+    }
+  ]
+}
--- a/fixtures/embed_async_pending/embed_texts_async_preset_switch.json
+++ b/fixtures/embed_async_pending/embed_texts_async_preset_switch.json
@@ -0,0 +1,22 @@
+{
+  "id": "embed_texts_async_preset_switch",
+  "category": "embed_async_pending",
+  "description": "embed_texts_async: preset override",
+  "call": "embed_texts_async",
+  "input": {
+    "texts": [
+      "Text"
+    ],
+    "config": {
+      "model": {
+        "type": "preset",
+        "name": "balanced"
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/embed_extra/embed_texts_batch.json
+++ b/fixtures/embed_extra/embed_texts_batch.json
@@ -0,0 +1,21 @@
+{
+  "id": "embed_texts_batch",
+  "category": "embed_extra",
+  "description": "Batch embed texts",
+  "tags": ["embedding", "batch"],
+  "call": "embed_texts",
+  "input": {
+    "texts": ["Hello", "World"],
+    "config": {
+      "model": {
+        "type": "preset",
+        "name": "balanced"
+      }
+    }
+  },
+  "assertions": [{ "type": "not_error" }],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "embeddings feature depends on ONNX Runtime which is not available on the WASM target"
+  }
+}
--- a/fixtures/embeddings/embed_texts_different_preset.json
+++ b/fixtures/embeddings/embed_texts_different_preset.json
@@ -0,0 +1,28 @@
+{
+  "id": "embed_texts_different_preset",
+  "category": "embeddings",
+  "description": "embed_texts: multilingual preset",
+  "call": "embed_texts",
+  "input": {
+    "texts": ["Hello world", "Test"],
+    "config": {
+      "model": {
+        "type": "preset",
+        "name": "multilingual"
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "count_min",
+      "field": "embeddings",
+      "value": 2
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"]
+  }
+}
--- a/fixtures/embeddings/get_embedding_preset_known.json
+++ b/fixtures/embeddings/get_embedding_preset_known.json
@@ -0,0 +1,17 @@
+{
+  "id": "get_embedding_preset_known",
+  "category": "embeddings",
+  "description": "get_embedding_preset: known preset",
+  "call": "get_embedding_preset",
+  "input": {
+    "preset_name": "balanced"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"]
+  }
+}
--- a/fixtures/embeddings/get_embedding_preset_nominal.json
+++ b/fixtures/embeddings/get_embedding_preset_nominal.json
@@ -0,0 +1,17 @@
+{
+  "id": "get_embedding_preset_nominal",
+  "category": "embeddings",
+  "description": "get_embedding_preset: nominal case",
+  "call": "get_embedding_preset",
+  "input": {
+    "preset_name": "balanced"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"]
+  }
+}
--- a/fixtures/embeddings/get_embedding_preset_unknown.json
+++ b/fixtures/embeddings/get_embedding_preset_unknown.json
@@ -0,0 +1,17 @@
+{
+  "id": "get_embedding_preset_unknown",
+  "category": "embeddings",
+  "description": "get_embedding_preset: unknown preset fails",
+  "call": "get_embedding_preset",
+  "input": {
+    "preset_name": "nonexistent-xyz"
+  },
+  "assertions": [
+    {
+      "type": "is_empty"
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"]
+  }
+}
--- a/fixtures/embeddings/list_embedding_presets_sanity.json
+++ b/fixtures/embeddings/list_embedding_presets_sanity.json
@@ -0,0 +1,15 @@
+{
+  "id": "list_embedding_presets_sanity",
+  "category": "embeddings",
+  "description": "list_embedding_presets: returns at least one",
+  "call": "list_embedding_presets",
+  "input": {},
+  "assertions": [
+    {
+      "type": "not_empty"
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"]
+  }
+}
--- a/fixtures/error/error_empty_bytes.json
+++ b/fixtures/error/error_empty_bytes.json
@@ -0,0 +1,17 @@
+{
+  "id": "error_empty_bytes",
+  "category": "error",
+  "description": "Graceful handling of empty bytes (should not error)",
+  "tags": ["error", "input", "edge-case"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "text/empty.txt",
+    "mime_type": "text/plain",
+    "config": {}
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/error/error_empty_mime.json
+++ b/fixtures/error/error_empty_mime.json
@@ -0,0 +1,17 @@
+{
+  "id": "error_empty_mime",
+  "category": "error",
+  "description": "Error when extracting with empty MIME type",
+  "tags": ["error", "input", "mime"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "text/plain.txt",
+    "mime_type": "",
+    "config": {}
+  },
+  "assertions": [
+    {
+      "type": "error"
+    }
+  ]
+}
--- a/fixtures/error/error_extract_bytes_conflicting_ocr.json
+++ b/fixtures/error/error_extract_bytes_conflicting_ocr.json
@@ -0,0 +1,13 @@
+{
+  "id": "error_extract_bytes_conflicting_ocr",
+  "category": "error",
+  "description": "extract_bytes force+disable OCR",
+  "tags": ["error", "validation"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "text/fake_text.txt",
+    "mime_type": "text/plain",
+    "config": { "force_ocr": true, "disable_ocr": true }
+  },
+  "assertions": [{ "type": "error" }]
+}
--- a/fixtures/error/error_invalid_mime_format.json
+++ b/fixtures/error/error_invalid_mime_format.json
@@ -0,0 +1,17 @@
+{
+  "id": "error_invalid_mime_format",
+  "category": "error",
+  "description": "Error when extracting with invalid MIME type format",
+  "tags": ["error", "input", "mime"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "text/plain.txt",
+    "mime_type": "not-a-mime",
+    "config": {}
+  },
+  "assertions": [
+    {
+      "type": "error"
+    }
+  ]
+}
--- a/fixtures/error/error_unsupported_mime.json
+++ b/fixtures/error/error_unsupported_mime.json
@@ -0,0 +1,17 @@
+{
+  "id": "error_unsupported_mime",
+  "category": "error",
+  "description": "Error when extracting with unsupported MIME type",
+  "tags": ["error", "input", "mime"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "text/plain.txt",
+    "mime_type": "application/x-nonexistent",
+    "config": {}
+  },
+  "assertions": [
+    {
+      "type": "error"
+    }
+  ]
+}
--- a/fixtures/format_specific/format_docx_standalone.json
+++ b/fixtures/format_specific/format_docx_standalone.json
@@ -0,0 +1,21 @@
+{
+  "id": "format_docx_standalone",
+  "category": "format_specific",
+  "description": "Standalone DOCX extraction using extract_bytes_sync",
+  "tags": ["format_specific", "docx", "text_extraction"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "docx/fake.docx",
+    "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 20
+    }
+  ]
+}
--- a/fixtures/format_specific/format_hwpx_standalone.json
+++ b/fixtures/format_specific/format_hwpx_standalone.json
@@ -0,0 +1,26 @@
+{
+  "id": "format_hwpx_standalone",
+  "category": "format_specific",
+  "description": "Standalone HWPX extraction using extract_bytes_sync",
+  "tags": ["format_specific", "hwpx", "text_extraction"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "hwpx/simple.hwpx",
+    "mime_type": "application/haansofthwpx"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 20
+    },
+    {
+      "type": "contains",
+      "field": "content",
+      "value": "Hello from HWPX"
+    }
+  ]
+}
--- a/fixtures/format_specific/format_pdf_text.json
+++ b/fixtures/format_specific/format_pdf_text.json
@@ -0,0 +1,26 @@
+{
+  "id": "format_pdf_text",
+  "category": "format_specific",
+  "description": "Standalone PDF text extraction using extract_bytes_sync",
+  "tags": ["format_specific", "pdf", "text_extraction"],
+  "call": "extract_bytes_sync",
+  "input": {
+    "data": "pdf/fake_memo.pdf",
+    "mime_type": "application/pdf"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 50
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["Mallori", "May"]
+    }
+  ]
+}
--- a/fixtures/format_specific/format_pptx.json
+++ b/fixtures/format_specific/format_pptx.json
@@ -0,0 +1,26 @@
+{
+  "id": "format_pptx",
+  "category": "format_specific",
+  "description": "PPTX presentation extraction using extract_file_sync",
+  "tags": [
+    "format_specific",
+    "pptx",
+    "text_extraction"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "pptx/simple.pptx",
+    "mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/format_specific/format_xlsx.json
+++ b/fixtures/format_specific/format_xlsx.json
@@ -0,0 +1,26 @@
+{
+  "id": "format_xlsx",
+  "category": "format_specific",
+  "description": "XLSX spreadsheet extraction using extract_file_sync",
+  "tags": [
+    "format_specific",
+    "xlsx",
+    "text_extraction"
+  ],
+  "call": "extract_file_sync",
+  "input": {
+    "path": "xlsx/stanley_cups.xlsx",
+    "mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "skip": {
+    "languages": [
+      "wasm"
+    ],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/images/test_hello_world.png
+++ b/fixtures/images/test_hello_world.png
--- a/fixtures/pdf/render_pdf_page_first.json
+++ b/fixtures/pdf/render_pdf_page_first.json
@@ -0,0 +1,21 @@
+{
+  "id": "render_pdf_page_first",
+  "category": "pdf",
+  "description": "render_pdf_page_to_png: first page",
+  "call": "render_pdf_page_to_png",
+  "input": {
+    "pdf_bytes": "pdf/fake_memo.pdf",
+    "page_index": 0,
+    "dpi": null,
+    "password": null
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    },
+    {
+      "type": "min_length",
+      "value": 100
+    }
+  ]
+}
--- a/fixtures/pdf/render_pdf_page_missing_file.json
+++ b/fixtures/pdf/render_pdf_page_missing_file.json
@@ -0,0 +1,36 @@
+{
+  "id": "render_pdf_page_missing_file",
+  "category": "pdf",
+  "description": "render_pdf_page_to_png: missing file",
+  "call": "render_pdf_page_to_png",
+  "skip": {
+    "languages": [
+      "python",
+      "node",
+      "ruby",
+      "php",
+      "ffi",
+      "go",
+      "java",
+      "csharp",
+      "elixir",
+      "wasm",
+      "r",
+      "dart",
+      "kotlin_android",
+      "swift",
+      "zig",
+      "rust"
+    ],
+    "reason": "render_pdf_page_to_png takes pre-loaded pdf_bytes; the harness materializes file contents at generation time, so a runtime missing-file error path is not expressible via this fixture shape"
+  },
+  "input": {
+    "pdf_path": "nonexistent/file.pdf",
+    "page_index": 0
+  },
+  "assertions": [
+    {
+      "type": "error"
+    }
+  ]
+}
--- a/fixtures/pdf/render_pdf_page_out_of_range.json
+++ b/fixtures/pdf/render_pdf_page_out_of_range.json
@@ -0,0 +1,17 @@
+{
+  "id": "render_pdf_page_out_of_range",
+  "category": "pdf",
+  "description": "render_pdf_page_to_png: page out of range",
+  "call": "render_pdf_page_to_png",
+  "input": {
+    "pdf_bytes": "pdf/fake_memo.pdf",
+    "page_index": 999,
+    "dpi": null,
+    "password": null
+  },
+  "assertions": [
+    {
+      "type": "error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/README.md
+++ b/fixtures/plugin_api/README.md
@@ -0,0 +1,279 @@
+# Plugin API Test Fixtures
+
+This directory contains fixtures for generating E2E tests for plugin/config/utility APIs across all language bindings.
+
+## Purpose
+
+Unlike document extraction fixtures (in parent `fixtures/` directory), these fixtures test:
+
+- Plugin management APIs (validators, post-processors, OCR backends, document extractors)
+- Configuration loading APIs (`from_file`, `discover`)
+- MIME utility APIs (`detect_mime_type`, `get_extensions_for_mime`, etc.)
+
+## Schema
+
+See `schema.json` for the complete JSON schema definition.
+
+## Fixture Structure
+
+Each fixture is a JSON file defining:
+
+- **id**: Unique identifier (e.g., `validators_list`)
+- **api_category**: Category of API (`validator_management`, `configuration`, `mime_utilities`, etc.)
+- **api_function**: Function name being tested (snake_case format)
+- **test_spec**: Test specification including:
+  - **pattern**: Test pattern type (see patterns below)
+  - **setup**: Optional setup steps (temp files, directories, etc.)
+  - **function_call**: Function to call with arguments
+  - **assertions**: Expected behavior and values
+  - **teardown**: Optional cleanup steps
+
+## Test Patterns
+
+### 1. `simple_list`
+
+Lists items from a registry. No setup required.
+
+**Example**: `validators_list.json`
+
+```json
+{
+  "pattern": "simple_list",
+  "function_call": { "name": "list_validators", "args": [] },
+  "assertions": { "return_type": "list", "list_item_type": "string" }
+}
+```
+
+### 2. `clear_registry`
+
+Clears a registry and verifies it's empty.
+
+**Example**: `validators_clear.json`
+
+```json
+{
+  "pattern": "clear_registry",
+  "function_call": { "name": "clear_validators", "args": [] },
+  "assertions": { "return_type": "void", "verify_cleanup": true }
+}
+```
+
+### 3. `graceful_unregister`
+
+Attempts to unregister a nonexistent item without error.
+
+**Example**: `ocr_backends_unregister.json`
+
+```json
+{
+  "pattern": "graceful_unregister",
+  "function_call": { "name": "unregister_ocr_backend", "args": ["nonexistent-backend-xyz"] },
+  "assertions": { "does_not_throw": true }
+}
+```
+
+### 4. `config_from_file`
+
+Creates a temp TOML file, loads config, verifies properties.
+
+**Example**: `config_from_file.json`
+
+```json
+{
+  "pattern": "config_from_file",
+  "setup": {
+    "create_temp_file": true,
+    "temp_file_name": "test_config.toml",
+    "temp_file_content": "[chunking]\\nmax_chars = 100\\n"
+  },
+  "function_call": {
+    "name": "from_file",
+    "is_method": true,
+    "class_name": "ExtractionConfig",
+    "args": ["${temp_file_path}"]
+  },
+  "assertions": {
+    "object_properties": [{ "path": "chunking.max_chars", "value": 100 }]
+  }
+}
+```
+
+### 5. `config_discover`
+
+Creates config in parent dir, changes to subdirectory, discovers config.
+
+**Example**: `config_discover.json`
+
+- Creates `kreuzberg.toml` in temp dir
+- Creates subdirectory and changes to it
+- Calls `ExtractionConfig.discover()`
+- Verifies config was found from parent
+
+### 6. `mime_from_bytes`
+
+Detects MIME type from byte content.
+
+**Example**: `mime_detect_bytes.json`
+
+```json
+{
+  "pattern": "mime_from_bytes",
+  "setup": { "test_data": "%PDF-1.4\\n" },
+  "function_call": { "name": "detect_mime_type", "args": ["${test_data_bytes}"] },
+  "assertions": { "string_contains": "pdf" }
+}
+```
+
+### 7. `mime_from_path`
+
+Creates temp file, detects MIME from path.
+
+**Example**: `mime_detect_path.json`
+
+### 8. `mime_extension_lookup`
+
+Queries extensions for a MIME type.
+
+**Example**: `mime_get_extensions.json`
+
+## Variable Substitution
+
+Fixtures can use variables in `args`:
+
+- `${temp_file_path}` - Path to created temp file
+- `${temp_dir_path}` - Path to created temp directory
+- `${test_data_bytes}` - Byte data from `setup.test_data`
+
+## Language-Specific Handling
+
+The generator translates fixtures to language-specific code:
+
+### Function Names
+
+- Fixture: `list_validators` (snake_case)
+- Python: `list_validators()`
+- TypeScript: `listValidators()`
+- Ruby: `list_validators`
+- Java: `listValidators()`
+- Go: `ListValidators()`
+
+### Class Methods
+
+- Fixture: `ExtractionConfig.from_file`
+- Python: `ExtractionConfig.from_file()`
+- TypeScript: `ExtractionConfig.fromFile()`
+- Ruby: `Config::Extraction.from_file`
+- Java: `ExtractionConfig.fromFile()`
+- Go: `ConfigFromFile()`
+
+### Temp File Handling
+
+- Python: `tmp_path` fixture (pytest)
+- TypeScript: `fs.mkdtempSync()` + `fs.rmSync()`
+- Ruby: `Dir.mktmpdir { }` block
+- Java: `@TempDir` annotation
+- Go: `t.TempDir()`
+
+### Assertions
+
+- Python: `assert` statements
+- TypeScript: `expect().toBe()` (Vitest)
+- Ruby: `expect().to` (RSpec)
+- Java: `assertEquals()` (JUnit)
+- Go: `if err != nil` checks
+
+## Special Cases
+
+### Go Lazy Initialization
+
+Document extractors in Go are lazily initialized. The fixture `extractors_list.json` includes:
+
+```json
+{
+  "setup": {
+    "lazy_init_required": {
+      "languages": ["go"],
+      "init_action": "extract_file_sync",
+      "init_data": {
+        "create_temp_file": true,
+        "temp_file_name": "test.pdf",
+        "temp_file_content": "%PDF-1.4\\n%EOF\\n"
+      }
+    }
+  }
+}
+```
+
+The generator will produce Go-specific setup code to extract a PDF before listing extractors.
+
+## Fixture Inventory
+
+### Validator Management (2 fixtures)
+
+- `validators_list.json` - List all validators
+- `validators_clear.json` - Clear validators
+
+### Post-Processor Management (2 fixtures)
+
+- `post_processors_list.json` - List all post-processors
+- `post_processors_clear.json` - Clear post-processors
+
+### OCR Backend Management (3 fixtures)
+
+- `ocr_backends_list.json` - List all OCR backends
+- `ocr_backends_unregister.json` - Unregister nonexistent backend
+- `ocr_backends_clear.json` - Clear OCR backends
+
+### Document Extractor Management (3 fixtures)
+
+- `extractors_list.json` - List all extractors (with Go lazy init)
+- `extractors_unregister.json` - Unregister nonexistent extractor
+- `extractors_clear.json` - Clear extractors
+
+### Configuration APIs (2 fixtures)
+
+- `config_from_file.json` - Load config from TOML file
+- `config_discover.json` - Discover config from directory tree
+
+### MIME Utilities (3 fixtures)
+
+- `mime_detect_bytes.json` - Detect MIME from bytes
+- `mime_detect_path.json` - Detect MIME from file path
+- `mime_get_extensions.json` - Get extensions for MIME type
+
+**Total**: 15 fixtures → 75 generated tests (15 per language × 5 languages)
+
+## Regenerating Tests
+
+After modifying fixtures, regenerate tests:
+
+```bash
+# Regenerate for all languages
+cargo run -p kreuzberg-e2e-generator -- generate --lang python
+cargo run -p kreuzberg-e2e-generator -- generate --lang typescript
+cargo run -p kreuzberg-e2e-generator -- generate --lang ruby
+cargo run -p kreuzberg-e2e-generator -- generate --lang java
+cargo run -p kreuzberg-e2e-generator -- generate --lang go
+```
+
+Or use the task runner:
+
+```bash
+task e2e:generate
+```
+
+## Adding New Fixtures
+
+1. Create JSON file following `schema.json`
+2. Choose appropriate test pattern
+3. Define setup/teardown if needed
+4. Specify assertions
+5. Regenerate tests
+6. Verify tests compile and pass
+
+## Notes
+
+- **DO NOT** write E2E tests by hand
+- **ALL** E2E tests must be generated from fixtures
+- This is non-negotiable architecture
+- Hand-written tests will be rejected by CI
--- a/fixtures/plugin_api/document_extractors_clear.json
+++ b/fixtures/plugin_api/document_extractors_clear.json
@@ -0,0 +1,17 @@
+{
+  "id": "document_extractors_clear",
+  "category": "document_extractor_management",
+  "description": "Clear all document extractors and verify list is empty",
+  "tags": [
+    "document_extractor",
+    "plugin_management",
+    "clear",
+    "trait-bridge"
+  ],
+  "call": "clear_document_extractors",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/embedding_backends_clear.json
+++ b/fixtures/plugin_api/embedding_backends_clear.json
@@ -0,0 +1,17 @@
+{
+  "id": "embedding_backends_clear",
+  "category": "embedding_backend_management",
+  "description": "Clear all embedding backends and verify list is empty",
+  "tags": [
+    "embedding",
+    "plugin_management",
+    "clear",
+    "trait-bridge"
+  ],
+  "call": "clear_embedding_backends",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/embedding_backends_list.json
+++ b/fixtures/plugin_api/embedding_backends_list.json
@@ -0,0 +1,12 @@
+{
+  "id": "embedding_backends_list",
+  "category": "embedding_backend_management",
+  "description": "List all registered embedding backends",
+  "tags": ["embedding", "plugin_management", "list"],
+  "call": "list_embedding_backends",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/extractors_list.json
+++ b/fixtures/plugin_api/extractors_list.json
@@ -0,0 +1,25 @@
+{
+  "id": "extractors_list",
+  "category": "document_extractor_management",
+  "description": "List all registered document extractors",
+  "tags": ["extractors", "plugin_management", "list"],
+  "call": "list_document_extractors",
+  "input": {
+    "setup": {
+      "lazy_init_required": {
+        "languages": ["go"],
+        "init_action": "extract_file_sync",
+        "init_data": {
+          "create_temp_file": true,
+          "temp_file_name": "test.pdf",
+          "temp_file_content": "%PDF-1.4\n%EOF\n"
+        }
+      }
+    }
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/mime_detect_bytes.json
+++ b/fixtures/plugin_api/mime_detect_bytes.json
@@ -0,0 +1,17 @@
+{
+  "id": "mime_detect_bytes",
+  "category": "mime_utilities",
+  "description": "Detect MIME type from file bytes",
+  "tags": ["mime", "detection", "bytes"],
+  "call": "detect_mime_type_from_bytes",
+  "input": {
+    "data": "pdf/fake_memo.pdf"
+  },
+  "assertions": [
+    {
+      "type": "contains",
+      "field": "result",
+      "value": "pdf"
+    }
+  ]
+}
--- a/fixtures/plugin_api/mime_detect_image.json
+++ b/fixtures/plugin_api/mime_detect_image.json
@@ -0,0 +1,17 @@
+{
+  "id": "mime_detect_image",
+  "category": "mime_utilities",
+  "description": "Detect MIME type from PNG image bytes",
+  "tags": ["mime", "detection", "image", "bytes"],
+  "call": "detect_mime_type_from_bytes",
+  "input": {
+    "data": "images/test_hello_world.png"
+  },
+  "assertions": [
+    {
+      "type": "contains",
+      "field": "result",
+      "value": "png"
+    }
+  ]
+}
--- a/fixtures/plugin_api/mime_get_extensions.json
+++ b/fixtures/plugin_api/mime_get_extensions.json
@@ -0,0 +1,17 @@
+{
+  "id": "mime_get_extensions",
+  "category": "mime_utilities",
+  "description": "Get file extensions for a MIME type",
+  "tags": ["mime", "extensions", "lookup"],
+  "call": "get_extensions_for_mime",
+  "input": {
+    "mime_type": "application/pdf"
+  },
+  "assertions": [
+    {
+      "type": "contains",
+      "field": "result",
+      "value": "pdf"
+    }
+  ]
+}
--- a/fixtures/plugin_api/ocr_backends_clear.json
+++ b/fixtures/plugin_api/ocr_backends_clear.json
@@ -0,0 +1,17 @@
+{
+  "id": "ocr_backends_clear",
+  "category": "ocr_backend_management",
+  "description": "Clear all OCR backends and verify list is empty",
+  "tags": [
+    "ocr",
+    "plugin_management",
+    "clear",
+    "trait-bridge"
+  ],
+  "call": "clear_ocr_backends",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/ocr_backends_list.json
+++ b/fixtures/plugin_api/ocr_backends_list.json
@@ -0,0 +1,12 @@
+{
+  "id": "ocr_backends_list",
+  "category": "ocr_backend_management",
+  "description": "List all registered OCR backends",
+  "tags": ["ocr", "plugin_management", "list"],
+  "call": "list_ocr_backends",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/ocr_backends_unregister.json
+++ b/fixtures/plugin_api/ocr_backends_unregister.json
@@ -0,0 +1,20 @@
+{
+  "id": "ocr_backends_unregister",
+  "category": "ocr_backend_management",
+  "description": "Unregister nonexistent OCR backend gracefully",
+  "tags": [
+    "ocr",
+    "plugin_management",
+    "unregister",
+    "trait-bridge"
+  ],
+  "call": "unregister_ocr_backend",
+  "input": {
+    "name": "nonexistent-backend-xyz"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/post_processors_clear.json
+++ b/fixtures/plugin_api/post_processors_clear.json
@@ -0,0 +1,17 @@
+{
+  "id": "post_processors_clear",
+  "category": "post_processor_management",
+  "description": "Clear all post-processors and verify list is empty",
+  "tags": [
+    "post_processors",
+    "plugin_management",
+    "clear",
+    "trait-bridge"
+  ],
+  "call": "clear_post_processors",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/post_processors_list.json
+++ b/fixtures/plugin_api/post_processors_list.json
@@ -0,0 +1,12 @@
+{
+  "id": "post_processors_list",
+  "category": "post_processor_management",
+  "description": "List all registered post-processors",
+  "tags": ["post_processors", "plugin_management", "list"],
+  "call": "list_post_processors",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/register_document_extractor_trait_bridge.json
+++ b/fixtures/plugin_api/register_document_extractor_trait_bridge.json
@@ -0,0 +1,28 @@
+{
+  "id": "register_document_extractor_trait_bridge",
+  "category": "plugin_api",
+  "description": "register_document_extractor: trait bridge",
+  "tags": [
+    "trait-bridge"
+  ],
+  "call": "register_document_extractor",
+  "input": {
+    "extractor": {
+      "type": "test",
+      "name": "test-extractor"
+    }
+  },
+  "args": [
+    {
+      "name": "extractor",
+      "field": "extractor",
+      "arg_type": "test_backend",
+      "trait": "DocumentExtractor"
+    }
+  ],
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/register_embedding_backend_trait_bridge.json
+++ b/fixtures/plugin_api/register_embedding_backend_trait_bridge.json
@@ -0,0 +1,29 @@
+{
+  "id": "register_embedding_backend_trait_bridge",
+  "category": "plugin_api",
+  "description": "register_embedding_backend: trait bridge",
+  "tags": [
+    "trait-bridge"
+  ],
+  "call": "register_embedding_backend",
+  "input": {
+    "backend": {
+      "type": "test",
+      "name": "test-embedding-backend",
+      "dimensions": 768
+    }
+  },
+  "args": [
+    {
+      "name": "backend",
+      "field": "backend",
+      "arg_type": "test_backend",
+      "trait": "EmbeddingBackend"
+    }
+  ],
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/register_ocr_backend_trait_bridge.json
+++ b/fixtures/plugin_api/register_ocr_backend_trait_bridge.json
@@ -0,0 +1,28 @@
+{
+  "id": "register_ocr_backend_trait_bridge",
+  "category": "plugin_api",
+  "description": "register_ocr_backend: trait bridge",
+  "tags": [
+    "trait-bridge"
+  ],
+  "call": "register_ocr_backend",
+  "input": {
+    "backend": {
+      "type": "test",
+      "name": "test-backend"
+    }
+  },
+  "args": [
+    {
+      "name": "backend",
+      "field": "backend",
+      "arg_type": "test_backend",
+      "trait": "OcrBackend"
+    }
+  ],
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/register_post_processor_trait_bridge.json
+++ b/fixtures/plugin_api/register_post_processor_trait_bridge.json
@@ -0,0 +1,28 @@
+{
+  "id": "register_post_processor_trait_bridge",
+  "category": "plugin_api",
+  "description": "register_post_processor: trait bridge",
+  "tags": [
+    "trait-bridge"
+  ],
+  "call": "register_post_processor",
+  "input": {
+    "processor": {
+      "type": "test",
+      "name": "test-processor"
+    }
+  },
+  "args": [
+    {
+      "name": "processor",
+      "field": "processor",
+      "arg_type": "test_backend",
+      "trait": "PostProcessor"
+    }
+  ],
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/register_renderer_trait_bridge.json
+++ b/fixtures/plugin_api/register_renderer_trait_bridge.json
@@ -0,0 +1,28 @@
+{
+  "id": "register_renderer_trait_bridge",
+  "category": "plugin_api",
+  "description": "register_renderer: trait bridge",
+  "tags": [
+    "trait-bridge"
+  ],
+  "call": "register_renderer",
+  "input": {
+    "renderer": {
+      "type": "test",
+      "name": "test-renderer"
+    }
+  },
+  "args": [
+    {
+      "name": "renderer",
+      "field": "renderer",
+      "arg_type": "test_backend",
+      "trait": "Renderer"
+    }
+  ],
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/register_validator_trait_bridge.json
+++ b/fixtures/plugin_api/register_validator_trait_bridge.json
@@ -0,0 +1,28 @@
+{
+  "id": "register_validator_trait_bridge",
+  "category": "plugin_api",
+  "description": "register_validator: trait bridge",
+  "tags": [
+    "trait-bridge"
+  ],
+  "call": "register_validator",
+  "input": {
+    "validator": {
+      "type": "test",
+      "name": "test-validator"
+    }
+  },
+  "args": [
+    {
+      "name": "validator",
+      "field": "validator",
+      "arg_type": "test_backend",
+      "trait": "Validator"
+    }
+  ],
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/renderers_clear.json
+++ b/fixtures/plugin_api/renderers_clear.json
@@ -0,0 +1,17 @@
+{
+  "id": "renderers_clear",
+  "category": "renderer_management",
+  "description": "Clear all renderers and verify list is empty",
+  "tags": [
+    "renderer",
+    "plugin_management",
+    "clear",
+    "trait-bridge"
+  ],
+  "call": "clear_renderers",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/renderers_list.json
+++ b/fixtures/plugin_api/renderers_list.json
@@ -0,0 +1,12 @@
+{
+  "id": "renderers_list",
+  "category": "renderer_management",
+  "description": "List all registered renderers",
+  "tags": ["renderer", "plugin_management", "list"],
+  "call": "list_renderers",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/unregister_document_extractor_after_register.json
+++ b/fixtures/plugin_api/unregister_document_extractor_after_register.json
@@ -0,0 +1,18 @@
+{
+  "id": "unregister_document_extractor_after_register",
+  "category": "plugin_api",
+  "description": "unregister_document_extractor",
+  "call": "unregister_document_extractor",
+  "input": {
+    "name": "test-extractor"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "tags": [
+    "plugin-lifecycle",
+    "trait-bridge"
+  ]
+}
--- a/fixtures/plugin_api/unregister_embedding_backend_after_register.json
+++ b/fixtures/plugin_api/unregister_embedding_backend_after_register.json
@@ -0,0 +1,18 @@
+{
+  "id": "unregister_embedding_backend_after_register",
+  "category": "plugin_api",
+  "description": "unregister_embedding_backend",
+  "call": "unregister_embedding_backend",
+  "input": {
+    "name": "test-embedding-backend"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "tags": [
+    "plugin-lifecycle",
+    "trait-bridge"
+  ]
+}
--- a/fixtures/plugin_api/unregister_post_processor_after_register.json
+++ b/fixtures/plugin_api/unregister_post_processor_after_register.json
@@ -0,0 +1,18 @@
+{
+  "id": "unregister_post_processor_after_register",
+  "category": "plugin_api",
+  "description": "unregister_post_processor",
+  "call": "unregister_post_processor",
+  "input": {
+    "name": "test-processor"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "tags": [
+    "plugin-lifecycle",
+    "trait-bridge"
+  ]
+}
--- a/fixtures/plugin_api/unregister_renderer_after_register.json
+++ b/fixtures/plugin_api/unregister_renderer_after_register.json
@@ -0,0 +1,18 @@
+{
+  "id": "unregister_renderer_after_register",
+  "category": "plugin_api",
+  "description": "unregister_renderer",
+  "call": "unregister_renderer",
+  "input": {
+    "name": "test-renderer"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "tags": [
+    "plugin-lifecycle",
+    "trait-bridge"
+  ]
+}
--- a/fixtures/plugin_api/unregister_validator_after_register.json
+++ b/fixtures/plugin_api/unregister_validator_after_register.json
@@ -0,0 +1,18 @@
+{
+  "id": "unregister_validator_after_register",
+  "category": "plugin_api",
+  "description": "unregister_validator",
+  "call": "unregister_validator",
+  "input": {
+    "name": "test-validator"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ],
+  "tags": [
+    "plugin-lifecycle",
+    "trait-bridge"
+  ]
+}
--- a/fixtures/plugin_api/validators_clear.json
+++ b/fixtures/plugin_api/validators_clear.json
@@ -0,0 +1,17 @@
+{
+  "id": "validators_clear",
+  "category": "validator_management",
+  "description": "Clear all validators and verify list is empty",
+  "tags": [
+    "validators",
+    "plugin_management",
+    "clear",
+    "trait-bridge"
+  ],
+  "call": "clear_validators",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/plugin_api/validators_list.json
+++ b/fixtures/plugin_api/validators_list.json
@@ -0,0 +1,12 @@
+{
+  "id": "validators_list",
+  "category": "validator_management",
+  "description": "List all registered validators",
+  "tags": ["validators", "plugin_management", "list"],
+  "call": "list_validators",
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/registry/extensions_docx.json
+++ b/fixtures/registry/extensions_docx.json
@@ -0,0 +1,15 @@
+{
+  "id": "extensions_docx",
+  "category": "registry_operations",
+  "description": "Get file extensions for DOCX MIME type",
+  "tags": ["registry", "extensions", "docx"],
+  "call": "get_extensions_for_mime",
+  "input": {
+    "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/registry/extensions_html.json
+++ b/fixtures/registry/extensions_html.json
@@ -0,0 +1,15 @@
+{
+  "id": "extensions_html",
+  "category": "registry_operations",
+  "description": "Get file extensions for HTML MIME type",
+  "tags": ["registry", "extensions", "html"],
+  "call": "get_extensions_for_mime",
+  "input": {
+    "mime_type": "text/html"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/registry/extensions_pdf.json
+++ b/fixtures/registry/extensions_pdf.json
@@ -0,0 +1,15 @@
+{
+  "id": "extensions_pdf",
+  "category": "registry_operations",
+  "description": "Get file extensions for PDF MIME type",
+  "tags": ["registry", "extensions", "pdf"],
+  "call": "get_extensions_for_mime",
+  "input": {
+    "mime_type": "application/pdf"
+  },
+  "assertions": [
+    {
+      "type": "not_error"
+    }
+  ]
+}
--- a/fixtures/registry/list_document_extractors.json
+++ b/fixtures/registry/list_document_extractors.json
@@ -0,0 +1,9 @@
+{
+  "id": "list_document_extractors",
+  "category": "registry",
+  "description": "List document extractors",
+  "tags": ["registry"],
+  "call": "list_document_extractors",
+  "input": {},
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/registry/list_embedding_backends.json
+++ b/fixtures/registry/list_embedding_backends.json
@@ -0,0 +1,9 @@
+{
+  "id": "list_embedding_backends",
+  "category": "registry",
+  "description": "List embedding backends",
+  "tags": ["registry"],
+  "call": "list_embedding_backends",
+  "input": {},
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/registry/list_ocr_backends.json
+++ b/fixtures/registry/list_ocr_backends.json
@@ -0,0 +1,9 @@
+{
+  "id": "list_ocr_backends",
+  "category": "registry",
+  "description": "List OCR backends",
+  "tags": ["registry"],
+  "call": "list_ocr_backends",
+  "input": {},
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/registry/list_post_processors.json
+++ b/fixtures/registry/list_post_processors.json
@@ -0,0 +1,9 @@
+{
+  "id": "list_post_processors",
+  "category": "registry",
+  "description": "List post-processors",
+  "tags": ["registry"],
+  "call": "list_post_processors",
+  "input": {},
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/registry/list_renderers.json
+++ b/fixtures/registry/list_renderers.json
@@ -0,0 +1,9 @@
+{
+  "id": "list_renderers",
+  "category": "registry",
+  "description": "List renderers",
+  "tags": ["registry"],
+  "call": "list_renderers",
+  "input": {},
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/registry/list_validators.json
+++ b/fixtures/registry/list_validators.json
@@ -0,0 +1,9 @@
+{
+  "id": "list_validators",
+  "category": "registry",
+  "description": "List validators",
+  "tags": ["registry"],
+  "call": "list_validators",
+  "input": {},
+  "assertions": [{ "type": "not_error" }]
+}
--- a/fixtures/smoke/docx_basic.json
+++ b/fixtures/smoke/docx_basic.json
@@ -0,0 +1,32 @@
+{
+  "id": "smoke_docx_basic",
+  "category": "smoke",
+  "description": "Smoke test: DOCX with formatted text",
+  "tags": ["smoke", "office", "docx"],
+  "input": {
+    "path": "docx/fake.docx",
+    "mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "config": {}
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 20
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["Lorem", "ipsum", "document", "text"]
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/fixtures/smoke/html_basic.json
+++ b/fixtures/smoke/html_basic.json
@@ -0,0 +1,32 @@
+{
+  "id": "smoke_html_basic",
+  "category": "smoke",
+  "description": "Smoke test: HTML table extraction",
+  "tags": ["smoke", "html"],
+  "input": {
+    "path": "html/simple_table.html",
+    "mime_type": "text/html",
+    "config": {}
+  },
+  "assertions": [
+    {
+      "type": "equals",
+      "field": "mime_type",
+      "value": "text/html"
+    },
+    {
+      "type": "min_length",
+      "field": "content",
+      "value": 10
+    },
+    {
+      "type": "contains_any",
+      "field": "content",
+      "values": ["Sample Data Table", "Laptop", "Electronics", "Product"]
+    }
+  ],
+  "skip": {
+    "languages": ["wasm"],
+    "reason": "WASM cannot access filesystem; use extractBytes with file content instead"
+  }
+}
--- a/Show More
+++ b/Show More