Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/packages/dart/.editorconfig
+++ b/packages/dart/.editorconfig
@@ -0,0 +1,8 @@
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+
+[*.dart]
+indent_style = space
+indent_size = 2
--- a/packages/dart/.gitignore
+++ b/packages/dart/.gitignore
@@ -0,0 +1,3 @@
+.dart_tool/
+build/
+pubspec.lock
--- a/packages/dart/BUILDING.md
+++ b/packages/dart/BUILDING.md
@@ -0,0 +1,37 @@
+# Building kreuzberg Dart bindings
+
+## Prerequisites
+
+Install the flutter_rust_bridge codegen tool (one-time setup):
+
+```sh
+cargo install flutter_rust_bridge_codegen
+```
+
+## Build steps
+
+1. Build the Rust binding crate:
+
+   ```sh
+   cargo build -p kreuzberg-dart
+   ```
+
+2. Run the FRB codegen to generate Dart bridge files:
+
+   ```sh
+   flutter_rust_bridge_codegen generate
+   ```
+
+   Alternatively, use alef which runs this step automatically via the configured
+   post-build hook:
+
+   ```sh
+   alef build --lang=dart
+   ```
+
+3. Fetch Dart dependencies and run the test suite:
+
+   ```sh
+   dart pub get
+   dart test
+   ```
--- a/packages/dart/CHANGELOG.md
+++ b/packages/dart/CHANGELOG.md
@@ -0,0 +1,7 @@
+# Changelog
+
+All notable changes to this package will be documented in this file.
+
+## 5.0.0-rc.3
+
+- Initial release.
--- a/packages/dart/LICENSE
+++ b/packages/dart/LICENSE
@@ -0,0 +1,93 @@
+Elastic License 2.0 (ELv2)
+
+Copyright 2025-2026 Kreuzberg, Inc.
+
+Acceptance
+
+By using the software, you agree to all of the terms and conditions below.
+
+Copyright License
+
+The licensor grants you a non-exclusive, royalty-free, worldwide,
+non-sublicensable, non-transferable license to use, copy, distribute, make
+available, and prepare derivative works of the software, in each case subject to
+the limitations and conditions below.
+
+Limitations
+
+You may not provide the software to third parties as a hosted or managed
+service, where the service provides users with access to any substantial set of
+the features or functionality of the software.
+
+You may not move, change, disable, or circumvent the license key functionality
+in the software, and you may not remove or obscure any functionality in the
+software that is protected by the license key.
+
+You may not alter, remove, or obscure any licensing, copyright, or other notices
+of the licensor in the software. Any use of the licensor's trademarks is subject
+to applicable law.
+
+Patents
+
+The licensor grants you a license, under any patent claims the licensor can
+license, or becomes able to license, to make, have made, use, sell, offer for
+sale, import and have imported the software, in each case subject to the
+limitations and conditions in this license. This license does not cover any
+patent claims that you cause to be infringed by modifications or additions to the
+software. If you or your company make any written claim that the software
+infringes or contributes to infringement of any patent, your patent license for
+the software granted under these terms ends immediately. If your company makes
+such a claim, your patent license ends immediately for work on behalf of your
+company.
+
+Notices
+
+You must ensure that anyone who gets a copy of any part of the software from you
+also gets a copy of these terms.
+
+If you modify the software, you must include in any modified copies of the
+software prominent notices stating that you have modified the software.
+
+No Other Rights
+
+These terms do not imply any licenses other than those expressly granted in
+these terms.
+
+Termination
+
+If you use the software in violation of these terms, such use is not licensed,
+and your licenses will automatically terminate. If the licensor provides you with
+a notice of your violation, and you cease all violation of this license no later
+than 30 days after you receive that notice, your licenses will be reinstated
+retroactively. However, if you violate these terms after such reinstatement, any
+additional violation of these terms will cause your licenses to terminate
+automatically and permanently.
+
+No Liability
+
+As far as the law allows, the software comes as is, without any warranty or
+condition, and the licensor will not be liable to you for any damages arising out
+of these terms or the use or nature of the software, under any kind of legal
+claim.
+
+Definitions
+
+The licensor is the entity offering these terms, and the software is the
+software the licensor makes available under these terms, including any portion
+of it.
+
+you refers to the individual or entity agreeing to these terms.
+
+your company is any legal entity, sole proprietorship, or other kind of
+organization that you work for, plus all organizations that have control over,
+are under the control of, or are under common control with that organization.
+control means ownership of substantially all the assets of an entity, or the
+power to direct its management and policies by vote, contract, or otherwise.
+Control can be direct or indirect.
+
+your licenses are all the licenses granted to you for the software under these
+terms.
+
+use means anything you do with the software requiring one of your licenses.
+
+trademark means trademarks, service marks, and similar rights.
--- a/packages/dart/README.md
+++ b/packages/dart/README.md
@@ -0,0 +1,443 @@
+# Dart / Flutter
+
+<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
+  <a href="https://github.com/kreuzberg-dev/alef">
+    <img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings">
+  </a>
+  <!-- Language Bindings -->
+  <a href="https://crates.io/crates/kreuzberg">
+    <img src="https://img.shields.io/crates/v/kreuzberg?label=Rust&color=007ec6" alt="Rust">
+  </a>
+  <a href="https://pypi.org/project/kreuzberg/">
+    <img src="https://img.shields.io/pypi/v/kreuzberg?label=Python&color=007ec6" alt="Python">
+  </a>
+  <a href="https://www.npmjs.com/package/@kreuzberg/node">
+    <img src="https://img.shields.io/npm/v/@kreuzberg/node?label=Node.js&color=007ec6" alt="Node.js">
+  </a>
+  <a href="https://www.npmjs.com/package/@kreuzberg/wasm">
+    <img src="https://img.shields.io/npm/v/@kreuzberg/wasm?label=WASM&color=007ec6" alt="WASM">
+  </a>
+  <a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg">
+    <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
+  </a>
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/go/v5">
+    <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v5*" alt="Go">
+  </a>
+  <a href="https://www.nuget.org/packages/Kreuzberg/">
+    <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
+  </a>
+  <a href="https://packagist.org/packages/kreuzberg/kreuzberg">
+    <img src="https://img.shields.io/packagist/v/kreuzberg/kreuzberg?label=PHP&color=007ec6" alt="PHP">
+  </a>
+  <a href="https://rubygems.org/gems/kreuzberg">
+    <img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
+  </a>
+  <a href="https://hex.pm/packages/kreuzberg">
+    <img src="https://img.shields.io/hexpm/v/kreuzberg?label=Elixir&color=007ec6" alt="Elixir">
+  </a>
+  <a href="https://kreuzberg-dev.r-universe.dev/kreuzberg">
+    <img src="https://img.shields.io/badge/R-kreuzberg-007ec6" alt="R">
+  </a>
+  <a href="https://pub.dev/packages/kreuzberg">
+    <img src="https://img.shields.io/pub/v/kreuzberg?label=Dart&color=007ec6" alt="Dart">
+  </a>
+  <a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg-android">
+    <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg-android?label=Kotlin&color=007ec6" alt="Kotlin">
+  </a>
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/swift">
+    <img src="https://img.shields.io/badge/Swift-SPM-007ec6" alt="Swift">
+  </a>
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/zig">
+    <img src="https://img.shields.io/badge/Zig-package-007ec6" alt="Zig">
+  </a>
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
+    <img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI">
+  </a>
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
+    <img src="https://img.shields.io/badge/Docker-ghcr.io-007ec6?logo=docker&logoColor=white" alt="Docker">
+  </a>
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/charts%2Fkreuzberg">
+    <img src="https://img.shields.io/badge/Helm-ghcr.io-007ec6?logo=helm&logoColor=white" alt="Helm">
+  </a>
+
+  <!-- Project Info -->
+  <a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
+    <img src="https://img.shields.io/badge/License-Elastic--2.0-007ec6" alt="License">
+  </a>
+  <a href="https://docs.kreuzberg.dev">
+    <img src="https://img.shields.io/badge/Docs-kreuzberg-007ec6" alt="Documentation">
+  </a>
+  <a href="https://huggingface.co/Kreuzberg">
+    <img src="https://img.shields.io/badge/Hugging%20Face-Kreuzberg-007ec6" alt="Hugging Face">
+  </a>
+</div>
+
+<div align="center" style="margin: 24px 0 0;">
+  <a href="https://kreuzberg.dev">
+    <img alt="Kreuzberg" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
+  </a>
+</div>
+
+<div align="center" style="display: flex; flex-wrap: wrap; gap: 12px; justify-content: center; margin: 28px 0 24px;">
+  <a href="https://discord.gg/xt9WY3GnKR">
+    <img height="22" src="https://img.shields.io/badge/Discord-Chat-007ec6?logo=discord&logoColor=white" alt="Join Discord">
+  </a>
+  <a href="https://docs.kreuzberg.dev/demo.html">
+    <img height="22" src="https://img.shields.io/badge/Live%20Demo-Open-007ec6?logo=webassembly&logoColor=white" alt="Live Demo">
+  </a>
+</div>
+
+Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Dart bindings via flutter_rust_bridge for both Flutter apps and pure-Dart server contexts, with isolate-safe Future/Stream APIs.
+
+## What This Package Provides
+
+- **Document intelligence core** — extract text, tables, images, metadata, entities, keywords, and code intelligence from one API.
+- **Format coverage** — PDF, Office, images, HTML/XML, email, archives, notebooks, citations, scientific formats, and plain text.
+- **OCR choices** — Tesseract, PaddleOCR, EasyOCR where supported, VLM OCR through liter-llm, and plugin hooks for custom backends.
+- **Same engine as every binding** — Rust, Python, Node.js, Go, Java, PHP, Ruby, .NET, Elixir, R, WASM, Kotlin Android, Swift, Dart, Zig, and C FFI share the same Rust implementation.
+- **Dart package** — Future/Stream API through flutter_rust_bridge.
+
+## Installation
+
+### Package Installation
+
+Install via pub:
+
+```bash
+dart pub add kreuzberg
+```
+
+For Flutter projects:
+
+```bash
+flutter pub add kreuzberg
+```
+
+### System Requirements
+- **Dart SDK 3.0+** for pure-Dart consumers
+- Flutter projects supported on macOS, iOS, Android, Linux, and Windows; Flutter Web is not supported
+- Native runtime delivered via `flutter_rust_bridge` with bundled binaries for the supported platforms
+- Optional: [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for OCR functionality
+
+## Quick Start
+
+### Basic Extraction
+
+Extract text, metadata, and structure from any supported document format:
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  // Sync semantics — flutter_rust_bridge surfaces every call as a Future,
+  // so even the *Sync entrypoints must be awaited from Dart.
+  final result = await KreuzbergBridge.extractFileSync('document.pdf', null);
+
+  print(result.content);
+  print('MIME type: ${result.mimeType}');
+  print('Tables: ${result.tables.length}');
+}
+```
+
+### Common Use Cases
+
+#### Extract with Custom Configuration
+
+Most use cases benefit from configuration to control extraction behavior:
+
+**With OCR (for scanned documents):**
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  final config = ExtractionConfig(
+    useCache: true,
+    enableQualityProcessing: true,
+    forceOcr: false,
+    disableOcr: false,
+    ocr: const OcrConfig(
+      enabled: true,
+      backend: 'tesseract',
+      language: 'eng',
+      autoRotate: false,
+    ),
+    resultFormat: ResultFormat.unified,
+    outputFormat: OutputFormat.plain(),
+    includeDocumentStructure: false,
+    maxArchiveDepth: 3,
+    useLayoutForMarkdown: false,
+  );
+
+  final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
+  print(result.content);
+}
+```
+
+#### Table Extraction
+
+See [Configuration Guide](https://docs.kreuzberg.dev/guides/configuration/) for table extraction options.
+
+#### Processing Multiple Files
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  final items = <BatchFileItem>[
+    const BatchFileItem(path: 'doc1.pdf'),
+    BatchFileItem(
+      path: 'scan.pdf',
+      config: FileExtractionConfig(forceOcr: true),
+    ),
+  ];
+
+  // Sync semantics — flutter_rust_bridge still returns a Future from Dart.
+  final results = await KreuzbergBridge.batchExtractFilesSync(items);
+
+  print('Processed ${results.length} files');
+  for (final result in results) {
+    print('${result.mimeType}: ${result.content.length} chars');
+  }
+}
+```
+
+#### Async Processing
+
+For non-blocking document processing:
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  final result = await KreuzbergBridge.extractFile('document.pdf', null);
+
+  print(result.content);
+  print('MIME type: ${result.mimeType}');
+  print('Tables: ${result.tables.length}');
+}
+```
+
+### Next Steps
+
+- **[Installation Guide](https://docs.kreuzberg.dev/getting-started/installation/)** - Platform-specific setup
+- **[API Documentation](https://docs.kreuzberg.dev/reference/api-python/)** - Complete API reference
+- **[Examples & Guides](https://docs.kreuzberg.dev/)** - Full code examples and usage guides
+- **[Configuration Guide](https://docs.kreuzberg.dev/guides/configuration/)** - Advanced configuration options
+
+## Features
+
+### Supported File Formats (90+)
+
+90+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
+
+#### Office Documents
+
+| Category | Formats | Capabilities |
+|----------|---------|--------------|
+| **Word Processing** | `.docx`, `.docm`, `.dotx`, `.dotm`, `.dot`, `.odt` | Full text, tables, images, metadata, styles |
+| **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.xltx`, `.xlt`, `.ods` | Sheet data, formulas, cell metadata, charts |
+| **Presentations** | `.pptx`, `.pptm`, `.ppsx`, `.potx`, `.potm`, `.pot`, `.ppt` | Slides, speaker notes, images, metadata |
+| **PDF** | `.pdf` | Text, tables, images, metadata, OCR support |
+| **eBooks** | `.epub`, `.fb2` | Chapters, metadata, embedded resources |
+| **Database** | `.dbf` | Table data extraction, field type support |
+| **Hangul** | `.hwp`, `.hwpx` | Korean document format, text extraction |
+
+#### Images (OCR-Enabled)
+
+| Category | Formats | Features |
+|----------|---------|----------|
+| **Raster** | `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`, `.tiff`, `.tif` | OCR, table detection, EXIF metadata, dimensions, color space |
+| **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.jbig2`, `.jb2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR via hayro-jpeg2000 (pure Rust decoder), JBIG2 support, table detection, format-specific metadata |
+| **Vector** | `.svg` | DOM parsing, embedded text, graphics metadata |
+
+#### Web & Data
+
+| Category | Formats | Features |
+|----------|---------|----------|
+| **Markup** | `.html`, `.htm`, `.xhtml`, `.xml`, `.svg` | DOM parsing, metadata (Open Graph, Twitter Card), link extraction |
+| **Structured Data** | `.json`, `.yaml`, `.yml`, `.toml`, `.csv`, `.tsv` | Schema detection, nested structures, validation |
+| **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.djot`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, Djot, reStructuredText, Org Mode |
+
+#### Email & Archives
+
+| Category | Formats | Features |
+|----------|---------|----------|
+| **Email** | `.eml`, `.msg` | Headers, body (HTML/plain), attachments, threading |
+| **Archives** | `.zip`, `.tar`, `.tgz`, `.gz`, `.7z` | File listing, nested archives, metadata |
+
+#### Academic & Scientific
+
+| Category | Formats | Features |
+|----------|---------|----------|
+| **Citations** | `.bib`, `.biblatex`, `.ris`, `.nbib`, `.enw`, `.csl` | Structured parsing: RIS (structured), PubMed/MEDLINE, EndNote XML (structured), BibTeX, CSL JSON |
+| **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
+| **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
+
+#### Code Intelligence (300+ Languages)
+
+| Feature | Description |
+|---------|-------------|
+| **Structure Extraction** | Functions, classes, methods, structs, interfaces, enums |
+| **Import/Export Analysis** | Module dependencies, re-exports, wildcard imports |
+| **Symbol Extraction** | Variables, constants, type aliases, properties |
+| **Docstring Parsing** | Google, NumPy, Sphinx, JSDoc, RustDoc, and 10+ formats |
+| **Diagnostics** | Parse errors with line/column positions |
+| **Syntax-Aware Chunking** | Split code by semantic boundaries, not arbitrary byte offsets |
+
+Powered by [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — [documentation](https://docs.tree-sitter-language-pack.kreuzberg.dev).
+
+**[Complete Format Reference](https://docs.kreuzberg.dev/reference/formats/)**
+
+### Key Capabilities
+
+- **Text Extraction** - Extract all text content with position and formatting information
+- **Metadata Extraction** - Retrieve document properties, creation date, author, etc.
+- **Table Extraction** - Parse tables with structure and cell content preservation
+- **Image Extraction** - Extract embedded images and render page previews
+- **OCR Support** - Integrate multiple OCR backends for scanned documents
+- **Async/Await** - Non-blocking document processing with concurrent operations
+- **Plugin System** - Extensible post-processing for custom text transformation
+- **Embeddings** - Generate vector embeddings using ONNX Runtime models
+- **Batch Processing** - Efficiently process multiple documents in parallel
+- **Memory Efficient** - Stream large files without loading entirely into memory
+- **Language Detection** - Detect and support multiple languages in documents
+- **Code Intelligence** - Extract structure, imports, exports, symbols, and docstrings from [300+ programming languages](https://docs.tree-sitter-language-pack.kreuzberg.dev) via tree-sitter
+- **Configuration** - Fine-grained control over extraction behavior
+
+### Performance Characteristics
+
+| Format | Speed | Memory | Notes |
+|--------|-------|--------|-------|
+| **PDF (text)** | 10-100 MB/s | ~50MB per doc | Fastest extraction |
+| **Office docs** | 20-200 MB/s | ~100MB per doc | DOCX, XLSX, PPTX |
+| **Images (OCR)** | 1-5 MB/s | Variable | Depends on OCR backend |
+| **Archives** | 5-50 MB/s | ~200MB per doc | ZIP, TAR, etc. |
+| **Web formats** | 50-200 MB/s | Streaming | HTML, XML, JSON |
+
+## OCR Support
+
+Kreuzberg supports multiple OCR backends for extracting text from scanned documents and images:
+
+- **Tesseract**
+
+- **Paddleocr**
+
+### OCR Configuration Example
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  final config = ExtractionConfig(
+    useCache: true,
+    enableQualityProcessing: true,
+    forceOcr: false,
+    disableOcr: false,
+    ocr: const OcrConfig(
+      enabled: true,
+      backend: 'tesseract',
+      language: 'eng',
+      autoRotate: false,
+    ),
+    resultFormat: ResultFormat.unified,
+    outputFormat: OutputFormat.plain(),
+    includeDocumentStructure: false,
+    maxArchiveDepth: 3,
+    useLayoutForMarkdown: false,
+  );
+
+  final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
+  print(result.content);
+}
+```
+
+## Async Support
+
+This binding provides full async/await support for non-blocking document processing:
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  final result = await KreuzbergBridge.extractFile('document.pdf', null);
+
+  print(result.content);
+  print('MIME type: ${result.mimeType}');
+  print('Tables: ${result.tables.length}');
+}
+```
+
+## Plugin System
+
+Kreuzberg supports extensible post-processing plugins for custom text transformation and filtering.
+
+For detailed plugin documentation, visit [Plugin System Guide](https://docs.kreuzberg.dev/guides/plugins/).
+
+## Embeddings Support
+
+Generate vector embeddings for extracted text using the built-in ONNX Runtime support. Requires ONNX Runtime installation.
+
+**[Embeddings Guide](https://docs.kreuzberg.dev/features/#embeddings)**
+
+## Batch Processing
+
+Process multiple documents efficiently:
+
+```dart title="Dart"
+import 'package:kreuzberg/kreuzberg.dart';
+
+Future<void> main() async {
+  final items = <BatchFileItem>[
+    const BatchFileItem(path: 'doc1.pdf'),
+    BatchFileItem(
+      path: 'scan.pdf',
+      config: FileExtractionConfig(forceOcr: true),
+    ),
+  ];
+
+  // Sync semantics — flutter_rust_bridge still returns a Future from Dart.
+  final results = await KreuzbergBridge.batchExtractFilesSync(items);
+
+  print('Processed ${results.length} files');
+  for (final result in results) {
+    print('${result.mimeType}: ${result.content.length} chars');
+  }
+}
+```
+
+## Configuration
+
+For advanced configuration options including language detection, table extraction, OCR settings, and more:
+
+**[Configuration Guide](https://docs.kreuzberg.dev/guides/configuration/)**
+
+## Documentation
+
+- **[Official Documentation](https://docs.kreuzberg.dev/)**
+- **[API Reference](https://docs.kreuzberg.dev/reference/api-python/)**
+- **[Examples & Guides](https://docs.kreuzberg.dev/)**
+
+## Contributing
+
+Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg-dev/kreuzberg/blob/main/CONTRIBUTING.md).
+
+## Part of Kreuzberg.dev
+
+- [Kreuzberg Cloud](https://github.com/kreuzberg-dev/kreuzberg-cloud) — managed extraction API with SDKs, dashboards, and observability.
+- [kreuzcrawl](https://github.com/kreuzberg-dev/kreuzcrawl) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
+- [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown) — fast, lossless HTML→Markdown engine.
+- [liter-llm](https://github.com/kreuzberg-dev/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
+- [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — tree-sitter grammars and code-intelligence primitives.
+- [alef](https://github.com/kreuzberg-dev/alef) — the polyglot binding generator that produces this README and all per-language bindings.
+- [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
+
+## License
+
+Elastic-2.0 License — see [LICENSE](../../LICENSE) for details.
+
+## Support
+
+- **Discord Community**: [Join our Discord](https://discord.gg/xt9WY3GnKR)
+- **GitHub Issues**: [Report bugs](https://github.com/kreuzberg-dev/kreuzberg/issues)
+- **Discussions**: [Ask questions](https://github.com/kreuzberg-dev/kreuzberg/discussions)
--- a/packages/dart/analysis_options.yaml
+++ b/packages/dart/analysis_options.yaml
@@ -0,0 +1,29 @@
+include: package:lints/recommended.yaml
+
+analyzer:
+  exclude:
+    - lib/src/frb/**
+    - lib/src/kreuzberg_bridge_generated/**
+    - example/**
+    - lib/src/traits.dart
+
+linter:
+  rules:
+    - avoid_empty_else
+    - avoid_print
+    - avoid_relative_lib_imports
+    - avoid_returning_this
+    - avoid_slow_async_io
+    - cancel_subscriptions
+    - close_sinks
+    - comment_references
+    - control_flow_in_finally
+    - empty_statements
+    - hash_and_equals
+    - literal_only_boolean_expressions
+    - no_adjacent_strings_in_list
+    - no_duplicate_case_values
+    - prefer_void_to_null
+    - throw_in_finally
+    - unnecessary_statements
+    - unrelated_type_equality_checks
--- a/packages/dart/android/CMakeLists.txt
+++ b/packages/dart/android/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.10)
+project(kreuzberg_dart VERSION 1.0)
+
+add_library(kreuzberg_dart SHARED IMPORTED)
+set_target_properties(kreuzberg_dart PROPERTIES
+  IMPORTED_LOCATION "${CMAKE_CURRENT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libkreuzberg_dart.so"
+)
--- a/packages/dart/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java
+++ b/packages/dart/android/app/src/main/java/io/flutter/plugins/GeneratedPluginRegistrant.java
@@ -0,0 +1,19 @@
+package io.flutter.plugins;
+
+import androidx.annotation.Keep;
+import androidx.annotation.NonNull;
+import io.flutter.Log;
+
+import io.flutter.embedding.engine.FlutterEngine;
+
+/**
+ * Generated file. Do not edit.
+ * This file is generated by the Flutter tool based on the
+ * plugins that support the Android platform.
+ */
+@Keep
+public final class GeneratedPluginRegistrant {
+  private static final String TAG = "GeneratedPluginRegistrant";
+  public static void registerWith(@NonNull FlutterEngine flutterEngine) {
+  }
+}
--- a/packages/dart/android/local.properties
+++ b/packages/dart/android/local.properties
@@ -0,0 +1,2 @@
+flutter.sdk=/opt/homebrew/share/flutter
+sdk.dir=/opt/homebrew/share/android-commandlinetools
--- a/packages/dart/android/src/main/AndroidManifest.xml
+++ b/packages/dart/android/src/main/AndroidManifest.xml
@@ -0,0 +1,3 @@
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="dev.kreuzberg">
+</manifest>
--- a/packages/dart/android/src/main/jniLibs/arm64-v8a/.gitkeep
+++ b/packages/dart/android/src/main/jniLibs/arm64-v8a/.gitkeep
--- a/packages/dart/android/src/main/jniLibs/x86_64/.gitkeep
+++ b/packages/dart/android/src/main/jniLibs/x86_64/.gitkeep
--- a/packages/dart/blobs/linux-aarch64/.gitkeep
+++ b/packages/dart/blobs/linux-aarch64/.gitkeep
--- a/packages/dart/blobs/linux-x86_64/.gitkeep
+++ b/packages/dart/blobs/linux-x86_64/.gitkeep
--- a/packages/dart/blobs/macos-arm64/.gitkeep
+++ b/packages/dart/blobs/macos-arm64/.gitkeep
--- a/packages/dart/blobs/windows-x86_64/.gitkeep
+++ b/packages/dart/blobs/windows-x86_64/.gitkeep
--- a/packages/dart/example/kreuzberg_example.dart
+++ b/packages/dart/example/kreuzberg_example.dart
@@ -0,0 +1,6 @@
+import 'package:kreuzberg' as kreuzberg;
+
+void main() {
+  print('Example: kreuzberg loaded successfully');
+  // Add your API calls here after code generation
+}
--- a/packages/dart/ios/Flutter/Generated.xcconfig
+++ b/packages/dart/ios/Flutter/Generated.xcconfig
@@ -0,0 +1,14 @@
+// This is a generated file; do not edit or check into version control.
+FLUTTER_ROOT=/opt/homebrew/share/flutter
+FLUTTER_APPLICATION_PATH=/Users/naamanhirschfeld/workspace/kreuzberg-dev/kreuzberg/packages/dart
+COCOAPODS_PARALLEL_CODE_SIGN=true
+FLUTTER_TARGET=lib/main.dart
+FLUTTER_BUILD_DIR=build
+FLUTTER_BUILD_NAME=5.0.0.1
+FLUTTER_BUILD_NUMBER=5.0.0.1
+EXCLUDED_ARCHS[sdk=iphonesimulator*]=i386
+EXCLUDED_ARCHS[sdk=iphoneos*]=armv7
+DART_OBFUSCATION=false
+TRACK_WIDGET_CREATION=true
+TREE_SHAKE_ICONS=false
+PACKAGE_CONFIG=.dart_tool/package_config.json
--- a/packages/dart/ios/Flutter/ephemeral/flutter_lldb_helper.py
+++ b/packages/dart/ios/Flutter/ephemeral/flutter_lldb_helper.py
@@ -0,0 +1,32 @@
+#
+# Generated file, do not edit.
+#
+
+import lldb
+
+def handle_new_rx_page(frame: lldb.SBFrame, bp_loc, extra_args, intern_dict):
+    """Intercept NOTIFY_DEBUGGER_ABOUT_RX_PAGES and touch the pages."""
+    base = frame.register["x0"].GetValueAsAddress()
+    page_len = frame.register["x1"].GetValueAsUnsigned()
+
+    # Note: NOTIFY_DEBUGGER_ABOUT_RX_PAGES will check contents of the
+    # first page to see if handled it correctly. This makes diagnosing
+    # misconfiguration (e.g. missing breakpoint) easier.
+    data = bytearray(page_len)
+    data[0:8] = b'IHELPED!'
+
+    error = lldb.SBError()
+    frame.GetThread().GetProcess().WriteMemory(base, data, error)
+    if not error.Success():
+        print(f'Failed to write into {base}[+{page_len}]', error)
+        return
+
+def __lldb_init_module(debugger: lldb.SBDebugger, _):
+    target = debugger.GetDummyTarget()
+    # Caveat: must use BreakpointCreateByRegEx here and not
+    # BreakpointCreateByName. For some reasons callback function does not
+    # get carried over from dummy target for the later.
+    bp = target.BreakpointCreateByRegex("^NOTIFY_DEBUGGER_ABOUT_RX_PAGES$")
+    bp.SetScriptCallbackFunction('{}.handle_new_rx_page'.format(__name__))
+    bp.SetAutoContinue(True)
+    print("-- LLDB integration loaded --")
--- a/packages/dart/ios/Flutter/ephemeral/flutter_lldbinit
+++ b/packages/dart/ios/Flutter/ephemeral/flutter_lldbinit
@@ -0,0 +1,5 @@
+#
+# Generated file, do not edit.
+#
+
+command script import --relative-to-command-file flutter_lldb_helper.py
--- a/packages/dart/ios/Flutter/flutter_export_environment.sh
+++ b/packages/dart/ios/Flutter/flutter_export_environment.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# This is a generated file; do not edit or check into version control.
+export "FLUTTER_ROOT=/opt/homebrew/share/flutter"
+export "FLUTTER_APPLICATION_PATH=/Users/naamanhirschfeld/workspace/kreuzberg-dev/kreuzberg/packages/dart"
+export "COCOAPODS_PARALLEL_CODE_SIGN=true"
+export "FLUTTER_TARGET=lib/main.dart"
+export "FLUTTER_BUILD_DIR=build"
+export "FLUTTER_BUILD_NAME=5.0.0.1"
+export "FLUTTER_BUILD_NUMBER=5.0.0.1"
+export "DART_OBFUSCATION=false"
+export "TRACK_WIDGET_CREATION=true"
+export "TREE_SHAKE_ICONS=false"
+export "PACKAGE_CONFIG=.dart_tool/package_config.json"
--- a/packages/dart/ios/Frameworks/.gitkeep
+++ b/packages/dart/ios/Frameworks/.gitkeep
--- a/packages/dart/ios/Runner/GeneratedPluginRegistrant.h
+++ b/packages/dart/ios/Runner/GeneratedPluginRegistrant.h
@@ -0,0 +1,19 @@
+//
+//  Generated file. Do not edit.
+//
+
+// clang-format off
+
+#ifndef GeneratedPluginRegistrant_h
+#define GeneratedPluginRegistrant_h
+
+#import <Flutter/Flutter.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface GeneratedPluginRegistrant : NSObject
+ (void)registerWithRegistry:(NSObject<FlutterPluginRegistry>*)registry;
+@end
+
+NS_ASSUME_NONNULL_END
+#endif /* GeneratedPluginRegistrant_h */
--- a/packages/dart/ios/Runner/GeneratedPluginRegistrant.m
+++ b/packages/dart/ios/Runner/GeneratedPluginRegistrant.m
@@ -0,0 +1,14 @@
+//
+//  Generated file. Do not edit.
+//
+
+// clang-format off
+
+#import "GeneratedPluginRegistrant.h"
+
+@implementation GeneratedPluginRegistrant
+
+ (void)registerWithRegistry:(NSObject<FlutterPluginRegistry>*)registry {
+}
+
+@end
--- a/packages/dart/ios/kreuzberg.podspec
+++ b/packages/dart/ios/kreuzberg.podspec
@@ -0,0 +1,20 @@
+#
+# To learn more about a Podspec see http://guides.cocoapods.org/syntax/podspec.html.
+# Run `pod lib lint kreuzberg.podspec` to validate before publishing.
+#
+Pod::Spec.new do |s|
+  s.name             = 'kreuzberg'
+  s.version          = '5.0.0-rc.1'
+  s.summary          = 'Rust document intelligence library — Flutter FFI plugin for iOS.'
+  s.description      = <<-DESC
+Flutter FFI plugin wrapping kreuzberg — document text extraction for iOS.
+                       DESC
+  s.homepage         = 'https://kreuzberg.dev'
+  s.license          = { :type => 'Elastic-2.0', :file => '../LICENSE' }
+  s.author           = { 'kreuzberg-dev' => 'hello@kreuzberg.dev' }
+  s.source           = { :path => '.' }
+  s.source_files     = 'Classes/**/*'
+  s.ios.deployment_target = '16.0'
+  s.vendored_frameworks = 'Frameworks/libkreuzberg_dart.xcframework'
+  s.pod_target_xcconfig = { 'DEFINES_MODULE' => 'YES' }
+end
--- a/packages/dart/lib/kreuzberg.dart
+++ b/packages/dart/lib/kreuzberg.dart
@@ -0,0 +1,3 @@
+// Generated by alef. Do not edit by hand.
+
+export 'src/kreuzberg.dart';
--- a/packages/dart/lib/src/LICENSE
+++ b/packages/dart/lib/src/LICENSE
@@ -0,0 +1,93 @@
+Elastic License 2.0 (ELv2)
+
+Copyright 2025-2026 Kreuzberg, Inc.
+
+Acceptance
+
+By using the software, you agree to all of the terms and conditions below.
+
+Copyright License
+
+The licensor grants you a non-exclusive, royalty-free, worldwide,
+non-sublicensable, non-transferable license to use, copy, distribute, make
+available, and prepare derivative works of the software, in each case subject to
+the limitations and conditions below.
+
+Limitations
+
+You may not provide the software to third parties as a hosted or managed
+service, where the service provides users with access to any substantial set of
+the features or functionality of the software.
+
+You may not move, change, disable, or circumvent the license key functionality
+in the software, and you may not remove or obscure any functionality in the
+software that is protected by the license key.
+
+You may not alter, remove, or obscure any licensing, copyright, or other notices
+of the licensor in the software. Any use of the licensor's trademarks is subject
+to applicable law.
+
+Patents
+
+The licensor grants you a license, under any patent claims the licensor can
+license, or becomes able to license, to make, have made, use, sell, offer for
+sale, import and have imported the software, in each case subject to the
+limitations and conditions in this license. This license does not cover any
+patent claims that you cause to be infringed by modifications or additions to the
+software. If you or your company make any written claim that the software
+infringes or contributes to infringement of any patent, your patent license for
+the software granted under these terms ends immediately. If your company makes
+such a claim, your patent license ends immediately for work on behalf of your
+company.
+
+Notices
+
+You must ensure that anyone who gets a copy of any part of the software from you
+also gets a copy of these terms.
+
+If you modify the software, you must include in any modified copies of the
+software prominent notices stating that you have modified the software.
+
+No Other Rights
+
+These terms do not imply any licenses other than those expressly granted in
+these terms.
+
+Termination
+
+If you use the software in violation of these terms, such use is not licensed,
+and your licenses will automatically terminate. If the licensor provides you with
+a notice of your violation, and you cease all violation of this license no later
+than 30 days after you receive that notice, your licenses will be reinstated
+retroactively. However, if you violate these terms after such reinstatement, any
+additional violation of these terms will cause your licenses to terminate
+automatically and permanently.
+
+No Liability
+
+As far as the law allows, the software comes as is, without any warranty or
+condition, and the licensor will not be liable to you for any damages arising out
+of these terms or the use or nature of the software, under any kind of legal
+claim.
+
+Definitions
+
+The licensor is the entity offering these terms, and the software is the
+software the licensor makes available under these terms, including any portion
+of it.
+
+you refers to the individual or entity agreeing to these terms.
+
+your company is any legal entity, sole proprietorship, or other kind of
+organization that you work for, plus all organizations that have control over,
+are under the control of, or are under common control with that organization.
+control means ownership of substantially all the assets of an entity, or the
+power to direct its management and policies by vote, contract, or otherwise.
+Control can be direct or indirect.
+
+your licenses are all the licenses granted to you for the software under these
+terms.
+
+use means anything you do with the software requiring one of your licenses.
+
+trademark means trademarks, service marks, and similar rights.
--- a/packages/dart/lib/src/kreuzberg.dart
+++ b/packages/dart/lib/src/kreuzberg.dart
@@ -0,0 +1,642 @@
+// Generated by alef. Do not edit by hand.
+
+import 'dart:typed_data';
+
+export 'kreuzberg_bridge_generated/lib.dart';
+export 'traits.dart';
+import 'kreuzberg_bridge_generated/lib.dart' as rust_bridge;
+// ignore: duplicate_import
+import 'kreuzberg_bridge_generated/lib.dart';
+
+class KreuzbergBridge {
+  /// Extract content from a byte array.
+  ///
+  /// This is the main entry point for in-memory extraction. It performs the following steps:
+  /// 1. Validate MIME type
+  /// 2. Handle legacy format conversion if needed
+  /// 3. Select appropriate extractor from registry
+  /// 4. Extract content
+  /// 5. Run post-processing pipeline
+  ///
+  /// # Arguments
+  ///
+  /// * `content` - The byte array to extract
+  /// * `mime_type` - MIME type of the content
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// An `ExtractionResult` containing the extracted content and metadata.
+  ///
+  /// # Errors
+  ///
+  /// Returns `KreuzbergError::Validation` if MIME type is invalid.
+  /// Returns `KreuzbergError::UnsupportedFormat` if MIME type is not supported.
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::extract_bytes;
+  /// use kreuzberg::core::config::ExtractionConfig;
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let bytes = b"Hello, world!";
+  /// let result = extract_bytes(bytes, "text/plain", &config).await?;
+  /// println!("Content: {}", result.content);
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<ExtractionResult> extractBytes(Uint8List content, String mimeType, [ExtractionConfig? config]) async {
+    return await rust_bridge.extractBytes(content: content, mimeType: mimeType, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Extract content from a file.
+  ///
+  /// This is the main entry point for file-based extraction. It performs the following steps:
+  /// 1. Check cache for existing result (if caching enabled)
+  /// 2. Detect or validate MIME type
+  /// 3. Select appropriate extractor from registry
+  /// 4. Extract content
+  /// 5. Run post-processing pipeline
+  /// 6. Store result in cache (if caching enabled)
+  ///
+  /// # Arguments
+  ///
+  /// * `path` - Path to the file to extract
+  /// * `mime_type` - Optional MIME type override. If None, will be auto-detected
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// An `ExtractionResult` containing the extracted content and metadata.
+  ///
+  /// # Errors
+  ///
+  /// Returns `KreuzbergError::Io` if the file doesn't exist (NotFound) or for other file I/O errors.
+  /// Returns `KreuzbergError::UnsupportedFormat` if MIME type is not supported.
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::extract_file;
+  /// use kreuzberg::core::config::ExtractionConfig;
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let result = extract_file("document.pdf", None, &config).await?;
+  /// println!("Content: {}", result.content);
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<ExtractionResult> extractFile(String path, String? mimeType, [ExtractionConfig? config]) async {
+    return await rust_bridge.extractFile(path: path, mimeType: mimeType, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Synchronous wrapper for `extract_file`.
+  ///
+  /// This is a convenience function that blocks the current thread until extraction completes.
+  /// For async code, use `extract_file` directly.
+  ///
+  /// Uses the global Tokio runtime for 100x+ performance improvement over creating
+  /// a new runtime per call. Always uses the global runtime to avoid nested runtime issues.
+  ///
+  /// This function is only available with the `tokio-runtime` feature. For WASM targets,
+  /// use a truly synchronous extraction approach instead.
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::extract_file_sync;
+  /// use kreuzberg::core::config::ExtractionConfig;
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let result = extract_file_sync("document.pdf", None, &config)?;
+  /// println!("Content: {}", result.content);
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<ExtractionResult> extractFileSync(String path, String? mimeType, [ExtractionConfig? config]) async {
+    return await rust_bridge.extractFileSync(path: path, mimeType: mimeType, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Synchronous wrapper for `extract_bytes`.
+  ///
+  /// Uses the global Tokio runtime for 100x+ performance improvement over creating
+  /// a new runtime per call.
+  ///
+  /// With the `tokio-runtime` feature, this blocks the current thread using the global
+  /// Tokio runtime. Without it (WASM), this calls a truly synchronous implementation.
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::extract_bytes_sync;
+  /// use kreuzberg::core::config::ExtractionConfig;
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let bytes = b"Hello, world!";
+  /// let result = extract_bytes_sync(bytes, "text/plain", &config)?;
+  /// println!("Content: {}", result.content);
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<ExtractionResult> extractBytesSync(Uint8List content, String mimeType, [ExtractionConfig? config]) async {
+    return await rust_bridge.extractBytesSync(content: content, mimeType: mimeType, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: 0, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Synchronous wrapper for `batch_extract_files`.
+  ///
+  /// Uses the global Tokio runtime for optimal performance.
+  /// Only available with `tokio-runtime` (WASM has no filesystem).
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::batch_extract_files_sync;
+  /// use kreuzberg::core::config::{ExtractionConfig, BatchFileItem, FileExtractionConfig};
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let items = vec![
+  ///     BatchFileItem {
+  ///         path: "doc1.pdf".into(),
+  ///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
+  ///     },
+  ///     BatchFileItem { path: "doc2.pdf".into(), config: None },
+  /// ];
+  /// let results = batch_extract_files_sync(items, &config)?;
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<ExtractionResult>> batchExtractFilesSync(List<BatchFileItem> items, [ExtractionConfig? config]) async {
+    return await rust_bridge.batchExtractFilesSync(items: items, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Synchronous wrapper for `batch_extract_bytes`.
+  ///
+  /// Uses the global Tokio runtime for optimal performance.
+  /// With the `tokio-runtime` feature, this blocks the current thread using the global
+  /// Tokio runtime. Without it (WASM), this calls a truly synchronous implementation
+  /// that iterates through items and calls `extract_bytes_sync()`.
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::batch_extract_bytes_sync;
+  /// use kreuzberg::core::config::{ExtractionConfig, BatchBytesItem, FileExtractionConfig};
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let items = vec![
+  ///     BatchBytesItem { content: b"content".to_vec(), mime_type: "text/plain".to_string(), config: None },
+  ///     BatchBytesItem {
+  ///         content: b"other".to_vec(),
+  ///         mime_type: "text/plain".to_string(),
+  ///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
+  ///     },
+  /// ];
+  /// let results = batch_extract_bytes_sync(items, &config)?;
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<ExtractionResult>> batchExtractBytesSync(List<BatchBytesItem> items, [ExtractionConfig? config]) async {
+    return await rust_bridge.batchExtractBytesSync(items: items, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Extract content from multiple files concurrently.
+  ///
+  /// This function processes multiple files in parallel, automatically managing
+  /// concurrency to prevent resource exhaustion. The concurrency limit can be
+  /// configured via `ExtractionConfig::max_concurrent_extractions` or defaults
+  /// to `(num_cpus * 1.5).ceil()`.
+  ///
+  /// Each file can optionally specify a [`FileExtractionConfig`] that overrides specific
+  /// fields from the batch-level `config`. Pass `None` for a file to use the batch defaults.
+  /// Batch-level settings like `max_concurrent_extractions` and `use_cache` are always
+  /// taken from the batch-level `config`.
+  ///
+  /// # Arguments
+  ///
+  /// * `items` - Vector of `BatchFileItem` structs, each containing a path and optional
+  ///   per-file configuration overrides.
+  /// * `config` - Batch-level extraction configuration (provides defaults and batch settings)
+  ///
+  /// # Returns
+  ///
+  /// A vector of `ExtractionResult` in the same order as the input items.
+  ///
+  /// # Errors
+  ///
+  /// Individual file errors are captured in the result metadata. System errors
+  /// (IO, RuntimeError equivalents) will bubble up and fail the entire batch.
+  ///
+  /// # Examples
+  ///
+  /// Simple usage with no per-file overrides:
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::batch_extract_files;
+  /// use kreuzberg::core::config::{ExtractionConfig, BatchFileItem};
+  /// use std::path::PathBuf;
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let items = vec![
+  ///     BatchFileItem { path: "doc1.pdf".into(), config: None },
+  ///     BatchFileItem { path: "doc2.pdf".into(), config: None },
+  /// ];
+  /// let results = batch_extract_files(items, &config).await?;
+  /// println!("Processed {} files", results.len());
+  /// ```
+  ///
+  /// Per-file configuration overrides:
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::batch_extract_files;
+  /// use kreuzberg::core::config::{ExtractionConfig, BatchFileItem, FileExtractionConfig};
+  /// use std::path::PathBuf;
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let items = vec![
+  ///     BatchFileItem {
+  ///         path: "scan.pdf".into(),
+  ///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
+  ///     },
+  ///     BatchFileItem { path: "notes.txt".into(), config: None },
+  /// ];
+  /// let results = batch_extract_files(items, &config).await?;
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<ExtractionResult>> batchExtractFiles(List<BatchFileItem> items, [ExtractionConfig? config]) async {
+    return await rust_bridge.batchExtractFiles(items: items, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Extract content from multiple byte arrays concurrently.
+  ///
+  /// This function processes multiple byte arrays in parallel, automatically managing
+  /// concurrency to prevent resource exhaustion. The concurrency limit can be
+  /// configured via `ExtractionConfig::max_concurrent_extractions` or defaults
+  /// to `(num_cpus * 1.5).ceil()`.
+  ///
+  /// Each item can optionally specify a [`FileExtractionConfig`] that overrides specific
+  /// fields from the batch-level `config`. Pass `None` as the config to use
+  /// the batch-level defaults for that item.
+  ///
+  /// # Arguments
+  ///
+  /// * `items` - Vector of `BatchBytesItem` structs, each containing content bytes,
+  ///   MIME type, and optional per-item configuration overrides.
+  /// * `config` - Batch-level extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// A vector of `ExtractionResult` in the same order as the input items.
+  ///
+  /// # Examples
+  ///
+  /// Simple usage with no per-item overrides:
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::batch_extract_bytes;
+  /// use kreuzberg::core::config::{ExtractionConfig, BatchBytesItem};
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let items = vec![
+  ///     BatchBytesItem { content: b"content 1".to_vec(), mime_type: "text/plain".to_string(), config: None },
+  ///     BatchBytesItem { content: b"content 2".to_vec(), mime_type: "text/plain".to_string(), config: None },
+  /// ];
+  /// let results = batch_extract_bytes(items, &config).await?;
+  /// println!("Processed {} items", results.len());
+  /// ```
+  ///
+  /// Per-item configuration overrides:
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::core::extractor::batch_extract_bytes;
+  /// use kreuzberg::core::config::{ExtractionConfig, BatchBytesItem, FileExtractionConfig};
+  ///
+  /// let config = ExtractionConfig::default();
+  /// let items = vec![
+  ///     BatchBytesItem { content: b"content".to_vec(), mime_type: "text/plain".to_string(), config: None },
+  ///     BatchBytesItem {
+  ///         content: b"<html>test</html>".to_vec(),
+  ///         mime_type: "text/html".to_string(),
+  ///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
+  ///     },
+  /// ];
+  /// let results = batch_extract_bytes(items, &config).await?;
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<ExtractionResult>> batchExtractBytes(List<BatchBytesItem> items, [ExtractionConfig? config]) async {
+    return await rust_bridge.batchExtractBytes(items: items, config: config ?? ExtractionConfig(useCache: true, enableQualityProcessing: true, ocr: null, forceOcr: false, forceOcrPages: null, disableOcr: false, chunking: null, contentFilter: null, images: null, pdfOptions: null, tokenReduction: null, languageDetection: null, pages: null, keywords: null, postprocessor: null, htmlOptions: null, htmlOutput: null, extractionTimeoutSecs: null, maxConcurrentExtractions: null, resultFormat: ResultFormat.unified, securityLimits: null, maxEmbeddedFileBytes: 0, outputFormat: OutputFormat.plain(), layout: null, useLayoutForMarkdown: false, includeDocumentStructure: false, acceleration: null, cacheNamespace: null, cacheTtlSecs: null, email: null, concurrency: null, maxArchiveDepth: 0, treeSitter: null, structuredExtraction: null, cancelToken: null));
+  }
+
+  /// Detect MIME type from raw file bytes.
+  ///
+  /// Uses magic byte signatures to detect file type from content.
+  /// Falls back to `infer` crate for comprehensive detection.
+  ///
+  /// For ZIP-based files, inspects contents to distinguish Office Open XML
+  /// formats (DOCX, XLSX, PPTX) from plain ZIP archives.
+  ///
+  /// # Arguments
+  ///
+  /// * `content` - Raw file bytes
+  ///
+  /// # Returns
+  ///
+  /// The detected MIME type string.
+  ///
+  /// # Errors
+  ///
+  /// Returns `KreuzbergError::UnsupportedFormat` if MIME type cannot be determined.
+  /// throws anyhow::Error on failure
+  static Future<String> detectMimeTypeFromBytes(Uint8List content) async {
+    return await rust_bridge.detectMimeTypeFromBytes(content: content);
+  }
+
+  /// Get file extensions for a given MIME type.
+  ///
+  /// Returns all known file extensions that map to the specified MIME type.
+  ///
+  /// # Arguments
+  ///
+  /// * `mime_type` - The MIME type to look up
+  ///
+  /// # Returns
+  ///
+  /// A vector of file extensions (without leading dot) for the MIME type.
+  ///
+  /// # Example
+  ///
+  /// ```
+  /// use kreuzberg::core::mime::get_extensions_for_mime;
+  ///
+  /// let extensions = get_extensions_for_mime("application/pdf").unwrap();
+  /// assert_eq!(extensions, vec!["pdf"]);
+  ///
+  /// let doc_extensions = get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document").unwrap();
+  /// assert!(doc_extensions.contains(&"docx".to_string()));
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<String>> getExtensionsForMime(String mimeType) async {
+    return await rust_bridge.getExtensionsForMime(mimeType: mimeType);
+  }
+
+  /// List the names of all registered embedding backends.
+  ///
+  /// Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language
+  /// bindings.
+  /// throws anyhow::Error on failure
+  static Future<List<String>> listEmbeddingBackends() async {
+    return await rust_bridge.listEmbeddingBackends();
+  }
+
+  /// List names of all registered document extractors.
+  /// throws anyhow::Error on failure
+  static Future<List<String>> listDocumentExtractors() async {
+    return await rust_bridge.listDocumentExtractors();
+  }
+
+  /// List all registered OCR backends.
+  ///
+  /// Returns the names of all OCR backends currently registered in the global registry.
+  ///
+  /// # Returns
+  ///
+  /// A vector of OCR backend names.
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// use kreuzberg::plugins::list_ocr_backends;
+  ///
+  /// let backends = list_ocr_backends()?;
+  /// for name in backends {
+  ///     println!("Registered OCR backend: {}", name);
+  /// }
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<String>> listOcrBackends() async {
+    return await rust_bridge.listOcrBackends();
+  }
+
+  /// List all registered post-processor names.
+  ///
+  /// Returns a vector of all post-processor names currently registered in the
+  /// global registry.
+  ///
+  /// # Returns
+  ///
+  /// - `Ok(Vec<String>)` - Vector of post-processor names
+  /// - `Err(...)` if the registry lock is poisoned
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// use kreuzberg::plugins::list_post_processors;
+  ///
+  /// let processors = list_post_processors()?;
+  /// for name in processors {
+  ///     println!("Registered post-processor: {}", name);
+  /// }
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<String>> listPostProcessors() async {
+    return await rust_bridge.listPostProcessors();
+  }
+
+  /// List names of all registered renderers.
+  ///
+  /// # Errors
+  ///
+  /// Returns an error if the registry lock is poisoned.
+  /// throws anyhow::Error on failure
+  static Future<List<String>> listRenderers() async {
+    return await rust_bridge.listRenderers();
+  }
+
+  /// List names of all registered validators.
+  /// throws anyhow::Error on failure
+  static Future<List<String>> listValidators() async {
+    return await rust_bridge.listValidators();
+  }
+
+  /// Compare two extraction results and return a structured diff.
+  ///
+  /// The comparison is purely structural — no I/O, no side effects. All fields
+  /// of [`ExtractionDiff`] are populated according to the provided [`DiffOptions`].
+  ///
+  /// # Arguments
+  ///
+  /// * `a` — the "before" extraction result
+  /// * `b` — the "after" extraction result
+  /// * `opts` — controls which sections are compared and optional truncation
+  ///
+  /// # Example
+  ///
+  /// ```rust,no_run
+  /// use kreuzberg::{ExtractionResult, diff::{compare, DiffOptions}};
+  ///
+  /// let mut a = ExtractionResult::default();
+  /// let mut b = ExtractionResult::default();
+  /// a.content = "Hello world".to_string();
+  /// b.content = "Hello Rust".to_string();
+  ///
+  /// let diff = compare(&a, &b, &DiffOptions::default());
+  /// assert_eq!(diff.content_diff.len(), 1);
+  /// ```
+  static Future<ExtractionDiff> compare(ExtractionResult a, ExtractionResult b, DiffOptions opts) async {
+    return await rust_bridge.compare(a: a, b: b, opts: opts);
+  }
+
+  /// Generate embeddings asynchronously for a list of text strings.
+  ///
+  /// This is the async counterpart to [`embed_texts`]. It offloads the blocking
+  /// ONNX inference work to a dedicated blocking thread pool via Tokio's
+  /// `spawn_blocking`, keeping the async executor free.
+  ///
+  /// Returns one embedding vector per input text in the same order.
+  ///
+  /// # Arguments
+  ///
+  /// * `texts` - Vec of strings to embed (owned, sent to blocking thread)
+  /// * `config` - Embedding configuration specifying model, batch size, and normalization
+  ///
+  /// # Errors
+  ///
+  /// - `KreuzbergError::MissingDependency` if ONNX Runtime is not installed
+  /// - `KreuzbergError::Embedding` if the preset name is unknown, model download fails,
+  ///   or the blocking inference task panics
+  ///
+  /// # Example
+  ///
+  /// ```rust,ignore
+  /// use kreuzberg::{embed_texts_async, EmbeddingConfig};
+  ///
+  /// let embeddings = embed_texts_async(
+  ///     vec!["Hello!".to_string()],
+  ///     &EmbeddingConfig::default(),
+  /// ).await?;
+  /// ```
+  /// throws anyhow::Error on failure
+  static Future<List<Float64List>> embedTextsAsync(List<String> texts, EmbeddingConfig config) async {
+    return await rust_bridge.embedTextsAsync(texts: texts, config: config);
+  }
+
+  /// Render a single PDF page to PNG bytes.
+  ///
+  /// Returns raw PNG-encoded bytes for the specified page at the given DPI.
+  /// Uses pdf_oxide with tiny-skia for pure-Rust rendering.
+  ///
+  /// # Arguments
+  ///
+  /// * `pdf_bytes` - Raw PDF file bytes
+  /// * `page_index` - Zero-based page index
+  /// * `dpi` - Resolution in dots per inch (default: 150)
+  /// * `password` - Optional password for encrypted PDFs
+  ///
+  /// # Errors
+  ///
+  /// Returns `KreuzbergError::Parsing` if the PDF cannot be opened, authenticated,
+  /// or rendered, or if `page_index` is out of range.
+  /// throws anyhow::Error on failure
+  static Future<Uint8List> renderPdfPageToPng(Uint8List pdfBytes, int pageIndex, {int? dpi, String? password}) async {
+    return await rust_bridge.renderPdfPageToPng(pdfBytes: pdfBytes, pageIndex: pageIndex, dpi: dpi, password: password);
+  }
+
+  /// Detect the MIME type of a file at the given path.
+  ///
+  /// Uses the file extension and optionally the file content to determine the MIME type.
+  /// Set `check_exists` to `true` to verify the file exists before detection.
+  /// throws anyhow::Error on failure
+  static Future<String> detectMimeType(String path, bool checkExists) async {
+    return await rust_bridge.detectMimeType(path: path, checkExists: checkExists);
+  }
+
+  /// Embed a list of texts using the configured embedding model.
+  ///
+  /// Returns a 2D vector where each inner vector is the embedding for the corresponding text.
+  /// throws anyhow::Error on failure
+  static Future<List<Float64List>> embedTexts(List<String> texts, EmbeddingConfig config) async {
+    return await rust_bridge.embedTexts(texts: texts, config: config);
+  }
+
+  /// Get an embedding preset by name.
+  ///
+  /// Returns `None` if no preset with the given name exists. Returns an owned
+  /// clone so the value is safe to pass across FFI boundaries.
+  static Future<EmbeddingPreset?> getEmbeddingPreset(String name) async {
+    return await rust_bridge.getEmbeddingPreset(name: name);
+  }
+
+  /// List the names of all available embedding presets.
+  ///
+  /// Returns owned `String`s so the values are safe to pass across FFI boundaries.
+  static Future<List<String>> listEmbeddingPresets() async {
+    return await rust_bridge.listEmbeddingPresets();
+  }
+
+  /// Register a Dart implementation of [OcrBackend] with the plugin registry.
+  static Future<void> registerOcrBackend(OcrBackendDartImpl impl) async {
+    await rust_bridge.registerOcrBackend(impl: impl);
+  }
+  /// Unregister a previously-registered [OcrBackend] plugin by name.
+  static Future<void> unregisterOcrBackend(String name) async {
+    await rust_bridge.unregisterOcrBackend(name: name);
+  }
+  /// Clear all registered [OcrBackend] plugins from the registry.
+  static Future<void> clearOcrBackends() async {
+    await rust_bridge.clearOcrBackends();
+  }
+  /// Register a Dart implementation of [PostProcessor] with the plugin registry.
+  static Future<void> registerPostProcessor(PostProcessorDartImpl impl) async {
+    await rust_bridge.registerPostProcessor(impl: impl);
+  }
+  /// Unregister a previously-registered [PostProcessor] plugin by name.
+  static Future<void> unregisterPostProcessor(String name) async {
+    await rust_bridge.unregisterPostProcessor(name: name);
+  }
+  /// Clear all registered [PostProcessor] plugins from the registry.
+  static Future<void> clearPostProcessors() async {
+    await rust_bridge.clearPostProcessors();
+  }
+  /// Register a Dart implementation of [Validator] with the plugin registry.
+  static Future<void> registerValidator(ValidatorDartImpl impl) async {
+    await rust_bridge.registerValidator(impl: impl);
+  }
+  /// Unregister a previously-registered [Validator] plugin by name.
+  static Future<void> unregisterValidator(String name) async {
+    await rust_bridge.unregisterValidator(name: name);
+  }
+  /// Clear all registered [Validator] plugins from the registry.
+  static Future<void> clearValidators() async {
+    await rust_bridge.clearValidators();
+  }
+  /// Register a Dart implementation of [EmbeddingBackend] with the plugin registry.
+  static Future<void> registerEmbeddingBackend(EmbeddingBackendDartImpl impl) async {
+    await rust_bridge.registerEmbeddingBackend(impl: impl);
+  }
+  /// Unregister a previously-registered [EmbeddingBackend] plugin by name.
+  static Future<void> unregisterEmbeddingBackend(String name) async {
+    await rust_bridge.unregisterEmbeddingBackend(name: name);
+  }
+  /// Clear all registered [EmbeddingBackend] plugins from the registry.
+  static Future<void> clearEmbeddingBackends() async {
+    await rust_bridge.clearEmbeddingBackends();
+  }
+  /// Register a Dart implementation of [DocumentExtractor] with the plugin registry.
+  static Future<void> registerDocumentExtractor(DocumentExtractorDartImpl impl) async {
+    await rust_bridge.registerDocumentExtractor(impl: impl);
+  }
+  /// Unregister a previously-registered [DocumentExtractor] plugin by name.
+  static Future<void> unregisterDocumentExtractor(String name) async {
+    await rust_bridge.unregisterDocumentExtractor(name: name);
+  }
+  /// Clear all registered [DocumentExtractor] plugins from the registry.
+  static Future<void> clearDocumentExtractors() async {
+    await rust_bridge.clearDocumentExtractors();
+  }
+  /// Register a Dart implementation of [Renderer] with the plugin registry.
+  static Future<void> registerRenderer(RendererDartImpl impl) async {
+    await rust_bridge.registerRenderer(impl: impl);
+  }
+  /// Unregister a previously-registered [Renderer] plugin by name.
+  static Future<void> unregisterRenderer(String name) async {
+    await rust_bridge.unregisterRenderer(name: name);
+  }
+  /// Clear all registered [Renderer] plugins from the registry.
+  static Future<void> clearRenderers() async {
+    await rust_bridge.clearRenderers();
+  }
+}
--- a/packages/dart/lib/src/kreuzberg_bridge_generated/frb_generated.dart
+++ b/packages/dart/lib/src/kreuzberg_bridge_generated/frb_generated.dart
--- a/packages/dart/lib/src/kreuzberg_bridge_generated/frb_generated.io.dart
+++ b/packages/dart/lib/src/kreuzberg_bridge_generated/frb_generated.io.dart
--- a/packages/dart/lib/src/kreuzberg_bridge_generated/frb_generated.web.dart
+++ b/packages/dart/lib/src/kreuzberg_bridge_generated/frb_generated.web.dart
--- a/packages/dart/lib/src/kreuzberg_bridge_generated/lib.dart
+++ b/packages/dart/lib/src/kreuzberg_bridge_generated/lib.dart
--- a/packages/dart/lib/src/kreuzberg_bridge_generated/lib.freezed.dart
+++ b/packages/dart/lib/src/kreuzberg_bridge_generated/lib.freezed.dart
--- a/packages/dart/lib/src/traits.dart
+++ b/packages/dart/lib/src/traits.dart
@@ -0,0 +1,646 @@
+// Generated by alef. Do not edit by hand.
+
+import 'kreuzberg_bridge_generated/lib.dart';
+import 'dart:typed_data';
+
+/// Abstract class for the `OcrBackend` Rust trait.
+///
+/// Implement this class and register your implementation via:
+/// ```dart
+/// class MyOcrBackend implements OcrBackend {
+///   @override
+///   Future<ExtractionResult> processImage(...) async { ... }
+///   @override
+///   Future<ExtractionResult> processImageFile(...) async { ... }
+///   @override
+///   Future<bool> supportsLanguage(...) async { ... }
+///   @override
+///   Future<OcrBackendType> backendType(...) async { ... }
+///   @override
+///   Future<List<String>> supportedLanguages(...) async { ... }
+///   @override
+///   Future<bool> supportsTableDetection(...) async { ... }
+///   @override
+///   Future<bool> supportsDocumentProcessing(...) async { ... }
+///   @override
+///   Future<ExtractionResult> processDocument(...) async { ... }
+/// }
+///
+/// final impl = createOcrBackendDartImpl(
+///   processImage: (...) => myInstance.processImage(...),
+///   processImageFile: (...) => myInstance.processImageFile(...),
+///   supportsLanguage: (...) => myInstance.supportsLanguage(...),
+///   backendType: (...) => myInstance.backendType(...),
+///   supportedLanguages: (...) => myInstance.supportedLanguages(...),
+///   supportsTableDetection: (...) => myInstance.supportsTableDetection(...),
+///   supportsDocumentProcessing: (...) => myInstance.supportsDocumentProcessing(...),
+///   processDocument: (...) => myInstance.processDocument(...),
+/// );
+/// ```
+///
+abstract class OcrBackend {
+  /// Process an image and extract text via OCR.
+  ///
+  /// # Arguments
+  ///
+  /// * `image_bytes` - Raw image data (JPEG, PNG, TIFF, etc.)
+  /// * `config` - OCR configuration (language, PSM mode, etc.)
+  ///
+  /// # Returns
+  ///
+  /// An `ExtractionResult` containing the extracted text and metadata.
+  ///
+  /// # Errors
+  ///
+  /// - `KreuzbergError::Ocr` - OCR processing failed
+  /// - `KreuzbergError::Validation` - Invalid image format or configuration
+  /// - `KreuzbergError::Io` - I/O errors (these always bubble up)
+  ///
+  /// # Reading `backend_options`
+  ///
+  /// Backends that support runtime tuning can read `config.backend_options` and
+  /// deserialize only the keys they care about. Unknown keys are silently ignored,
+  /// so multiple backends can coexist in a pipeline without key conflicts.
+  ///
+  /// ```rust
+  /// async fn process_image(&self, image_bytes: &[u8], config: &OcrConfig) -> Result<ExtractionResult> {
+  ///     // Read backend-specific options; unknown keys are silently ignored.
+  ///     let fast_mode = config.backend_options
+  ///         .as_ref()
+  ///         .and_then(|v| v.get("mode"))
+  ///         .and_then(|v| v.as_str())
+  ///         .map(|s| s == "fast")
+  ///         .unwrap_or(false);
+  ///
+  ///     if image_bytes.is_empty() {
+  ///         return Err(kreuzberg::KreuzbergError::Validation {
+  ///             message: "Empty image data".to_string(),
+  ///             source: None,
+  ///         });
+  ///     }
+  ///
+  ///     let text = if fast_mode {
+  ///         "Fast OCR result".to_string()
+  ///     } else {
+  ///         format!("Extracted text in language: {}", config.language)
+  ///     };
+  ///
+  ///     Ok(ExtractionResult {
+  ///         content: text,
+  ///         mime_type: Cow::Borrowed("text/plain"),
+  ///         ..Default::default()
+  ///     })
+  /// }
+  /// ```
+  /// throws anyhow::Error on failure
+  Future<ExtractionResult> processImage(Uint8List imageBytes, OcrConfig config);
+  /// Process a file and extract text via OCR.
+  ///
+  /// Default implementation reads the file and calls `process_image`.
+  /// Override for custom file handling or optimizations.
+  ///
+  /// # Arguments
+  ///
+  /// * `path` - Path to the image file
+  /// * `config` - OCR configuration
+  ///
+  /// # Errors
+  ///
+  /// Same as `process_image`, plus file I/O errors.
+  /// throws anyhow::Error on failure
+  Future<ExtractionResult> processImageFile(String path, OcrConfig config);
+  /// Check if this backend supports a given language code.
+  ///
+  /// # Arguments
+  ///
+  /// * `lang` - ISO 639-2/3 language code (e.g., "eng", "deu", "fra")
+  ///
+  /// # Returns
+  ///
+  /// `true` if the language is supported, `false` otherwise.
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// fn supports_language(&self, lang: &str) -> bool {
+  ///     self.languages.contains(&lang.to_string())
+  /// }
+  /// ```
+  Future<bool> supportsLanguage(String lang);
+  /// Get the backend type identifier.
+  ///
+  /// # Returns
+  ///
+  /// The backend type enum value.
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// fn backend_type(&self) -> OcrBackendType {
+  ///     OcrBackendType::Tesseract
+  /// }
+  /// ```
+  Future<OcrBackendType> backendType();
+  /// Optional: Get a list of all supported languages.
+  ///
+  /// Defaults to empty list. Override to provide comprehensive language support info.
+  Future<List<String>> supportedLanguages();
+  /// Optional: Check if the backend supports table detection.
+  ///
+  /// Defaults to `false`. Override if your backend can detect and extract tables.
+  Future<bool> supportsTableDetection();
+  /// Check if the backend supports direct document-level processing (e.g. for PDFs).
+  ///
+  /// Defaults to `false`. Override if the backend has optimized document processing.
+  Future<bool> supportsDocumentProcessing();
+  /// Process a document file directly via OCR.
+  ///
+  /// Only called if `supports_document_processing` returns `true`.
+  ///
+  /// # Arguments
+  ///
+  /// * `path` - Path to the document file (e.g. .pdf)
+  /// * `config` - OCR configuration
+  /// throws anyhow::Error on failure
+  Future<ExtractionResult> processDocument(String path, OcrConfig config);
+}
+
+/// Abstract class for the `PostProcessor` Rust trait.
+///
+/// Implement this class and register your implementation via:
+/// ```dart
+/// class MyPostProcessor implements PostProcessor {
+///   @override
+///   Future<void> process(...) async { ... }
+///   @override
+///   Future<ProcessingStage> processingStage(...) async { ... }
+///   @override
+///   Future<bool> shouldProcess(...) async { ... }
+///   @override
+///   Future<int> estimatedDurationMs(...) async { ... }
+///   @override
+///   Future<int> priority(...) async { ... }
+/// }
+///
+/// final impl = createPostProcessorDartImpl(
+///   process: (...) => myInstance.process(...),
+///   processingStage: (...) => myInstance.processingStage(...),
+///   shouldProcess: (...) => myInstance.shouldProcess(...),
+///   estimatedDurationMs: (...) => myInstance.estimatedDurationMs(...),
+///   priority: (...) => myInstance.priority(...),
+/// );
+/// ```
+///
+abstract class PostProcessor {
+  /// Process an extraction result.
+  ///
+  /// Transform or enrich the extraction result. Can modify:
+  /// - `content` - The extracted text
+  /// - `metadata` - Add or update metadata fields
+  /// - `tables` - Modify or enhance table data
+  ///
+  /// # Arguments
+  ///
+  /// * `result` - Mutable reference to the extraction result to process
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// `Ok(())` if processing succeeded, `Err(...)` for fatal failures.
+  ///
+  /// # Errors
+  ///
+  /// Return errors for fatal processing failures. Non-fatal errors should be
+  /// captured in metadata directly on the result.
+  ///
+  /// # Performance
+  ///
+  /// This signature avoids unnecessary cloning of large extraction results by
+  /// taking a mutable reference instead of ownership. Processors modify the
+  /// result in place.
+  ///
+  /// # Example - Language Detection
+  ///
+  /// ```rust
+  /// async fn process(&self, result: &mut ExtractionResult, config: &ExtractionConfig)
+  ///     -> Result<()> {
+  ///     // Detect language (simplified - use real detection library in practice)
+  ///     let language = "en"; // Placeholder detection
+  ///
+  ///     // Add to metadata
+  ///     result.metadata.additional.insert("detected_language".to_string().into(), serde_json::json!(language));
+  ///
+  ///     Ok(())
+  /// }
+  /// ```
+  ///
+  /// # Example - Text Cleaning
+  ///
+  /// ```rust
+  /// async fn process(&self, result: &mut ExtractionResult, config: &ExtractionConfig)
+  ///     -> Result<()> {
+  ///     // Remove excessive whitespace
+  ///     result.content = result
+  ///         .content
+  ///         .split_whitespace()
+  ///         .collect::<Vec<_>>()
+  ///         .join(" ");
+  ///
+  ///     Ok(())
+  /// }
+  /// ```
+  /// throws anyhow::Error on failure
+  Future<void> process(ExtractionResult result, ExtractionConfig config);
+  /// Get the processing stage for this post-processor.
+  ///
+  /// Determines when this processor runs in the pipeline.
+  ///
+  /// # Returns
+  ///
+  /// The `ProcessingStage` (Early, Middle, or Late).
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// fn processing_stage(&self) -> ProcessingStage {
+  ///     ProcessingStage::Early  // Run before other processors
+  /// }
+  /// ```
+  Future<ProcessingStage> processingStage();
+  /// Optional: Check if this processor should run for a given result.
+  ///
+  /// Allows conditional processing based on MIME type, metadata, or content.
+  /// Defaults to `true` (always run).
+  ///
+  /// # Arguments
+  ///
+  /// * `result` - The extraction result to check
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// `true` if the processor should run, `false` to skip.
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// /// Only process PDF documents
+  /// fn should_process(&self, result: &ExtractionResult, config: &ExtractionConfig) -> bool {
+  ///     result.mime_type == "application/pdf"
+  /// }
+  /// ```
+  Future<bool> shouldProcess(ExtractionResult result, ExtractionConfig config);
+  /// Optional: Estimate processing time in milliseconds.
+  ///
+  /// Used for logging and debugging. Defaults to 0 (unknown).
+  ///
+  /// # Arguments
+  ///
+  /// * `result` - The extraction result to estimate for
+  ///
+  /// # Returns
+  ///
+  /// Estimated processing time in milliseconds.
+  Future<int> estimatedDurationMs(ExtractionResult result);
+  /// Execution priority within the processing stage.
+  ///
+  /// Higher values run first within the same `ProcessingStage`. Defaults to 50.
+  /// Use 0-49 for fallback processors, 50 for normal processors, and 51-255
+  /// for high-priority processors that should run early in their stage.
+  Future<int> priority();
+}
+
+/// Abstract class for the `Validator` Rust trait.
+///
+/// Implement this class and register your implementation via:
+/// ```dart
+/// class MyValidator implements Validator {
+///   @override
+///   Future<void> validate(...) async { ... }
+///   @override
+///   Future<bool> shouldValidate(...) async { ... }
+///   @override
+///   Future<int> priority(...) async { ... }
+/// }
+///
+/// final impl = createValidatorDartImpl(
+///   validate: (...) => myInstance.validate(...),
+///   shouldValidate: (...) => myInstance.shouldValidate(...),
+///   priority: (...) => myInstance.priority(...),
+/// );
+/// ```
+///
+abstract class Validator {
+  /// Validate an extraction result.
+  ///
+  /// Check the extraction result and return `Ok(())` if valid, or an error
+  /// if validation fails.
+  ///
+  /// # Arguments
+  ///
+  /// * `result` - The extraction result to validate
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// - `Ok(())` if validation passes
+  /// - `Err(...)` if validation fails (extraction will fail)
+  ///
+  /// # Errors
+  ///
+  /// - `KreuzbergError::Validation` - Validation failed
+  /// - Any other error type appropriate for the failure
+  ///
+  /// # Example - Content Length Validation
+  ///
+  /// ```rust
+  /// async fn validate(&self, result: &ExtractionResult, config: &ExtractionConfig)
+  ///     -> Result<()> {
+  ///     let length = result.content.len();
+  ///
+  ///     if length < self.min {
+  ///         return Err(KreuzbergError::validation(format!(
+  ///             "Content too short: {} < {} characters",
+  ///             length, self.min
+  ///         )));
+  ///     }
+  ///
+  ///     if length > self.max {
+  ///         return Err(KreuzbergError::validation(format!(
+  ///             "Content too long: {} > {} characters",
+  ///             length, self.max
+  ///         )));
+  ///     }
+  ///
+  ///     Ok(())
+  /// }
+  /// ```
+  ///
+  /// # Example - Quality Score Validation
+  ///
+  /// ```rust
+  /// async fn validate(&self, result: &ExtractionResult, config: &ExtractionConfig)
+  ///     -> Result<()> {
+  ///     // Check if quality_score exists in metadata
+  ///     let score = result.metadata
+  ///         .additional
+  ///         .get("quality_score")
+  ///         .and_then(|v| v.as_f64())
+  ///         .unwrap_or(0.0);
+  ///
+  ///     if score < self.min_score {
+  ///         return Err(KreuzbergError::validation(format!(
+  ///             "Quality score too low: {} < {}",
+  ///             score, self.min_score
+  ///         )));
+  ///     }
+  ///
+  ///     Ok(())
+  /// }
+  /// ```
+  ///
+  /// # Example - Security Validation
+  ///
+  /// ```rust
+  /// async fn validate(&self, result: &ExtractionResult, config: &ExtractionConfig)
+  ///     -> Result<()> {
+  ///     // Check for blocked patterns
+  ///     for pattern in &self.blocked_patterns {
+  ///         if result.content.contains(pattern) {
+  ///             return Err(KreuzbergError::validation(format!(
+  ///                 "Content contains blocked pattern: {}",
+  ///                 pattern
+  ///             )));
+  ///         }
+  ///     }
+  ///
+  ///     Ok(())
+  /// }
+  /// ```
+  /// throws anyhow::Error on failure
+  Future<void> validate(ExtractionResult result, ExtractionConfig config);
+  /// Optional: Check if this validator should run for a given result.
+  ///
+  /// Allows conditional validation based on MIME type, metadata, or content.
+  /// Defaults to `true` (always run).
+  ///
+  /// # Arguments
+  ///
+  /// * `result` - The extraction result to check
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// `true` if the validator should run, `false` to skip.
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// /// Only validate PDF documents
+  /// fn should_validate(&self, result: &ExtractionResult, config: &ExtractionConfig) -> bool {
+  ///     result.mime_type == "application/pdf"
+  /// }
+  /// ```
+  Future<bool> shouldValidate(ExtractionResult result, ExtractionConfig config);
+  /// Optional: Get the validation priority.
+  ///
+  /// Higher priority validators run first. Useful for ordering validation checks
+  /// (e.g., run cheap validations before expensive ones).
+  ///
+  /// Default priority is 50.
+  ///
+  /// # Returns
+  ///
+  /// Priority value (higher = runs earlier).
+  ///
+  /// # Example
+  ///
+  /// ```rust
+  /// /// Run this validator first (it's fast)
+  /// fn priority(&self) -> i32 {
+  ///     100
+  /// }
+  /// ```
+  Future<int> priority();
+}
+
+/// Abstract class for the `EmbeddingBackend` Rust trait.
+///
+/// Implement this class and register your implementation via:
+/// ```dart
+/// class MyEmbeddingBackend implements EmbeddingBackend {
+///   @override
+///   Future<int> dimensions(...) async { ... }
+///   @override
+///   Future<List<Float64List>> embed(...) async { ... }
+/// }
+///
+/// final impl = createEmbeddingBackendDartImpl(
+///   dimensions: (...) => myInstance.dimensions(...),
+///   embed: (...) => myInstance.embed(...),
+/// );
+/// ```
+///
+abstract class EmbeddingBackend {
+  /// Embedding vector dimension. Must be `> 0` and must match the length of
+  /// every vector returned by `embed`.
+  Future<int> dimensions();
+  /// Embed a batch of texts, returning one vector per input in order.
+  ///
+  /// # Errors
+  ///
+  /// Implementations should return `Plugin` for
+  /// backend-specific failures. The dispatcher layers its own validation
+  /// (length, per-vector dimension) on top.
+  /// throws anyhow::Error on failure
+  Future<List<Float64List>> embed(List<String> texts);
+}
+
+/// Abstract class for the `DocumentExtractor` Rust trait.
+///
+/// Implement this class and register your implementation via:
+/// ```dart
+/// class MyDocumentExtractor implements DocumentExtractor {
+///   @override
+///   Future<InternalDocumentBridge> extractBytes(...) async { ... }
+///   @override
+///   Future<InternalDocumentBridge> extractFile(...) async { ... }
+///   @override
+///   Future<List<String>> supportedMimeTypes(...) async { ... }
+///   @override
+///   Future<int> priority(...) async { ... }
+///   @override
+///   Future<bool> canHandle(...) async { ... }
+/// }
+///
+/// final impl = createDocumentExtractorDartImpl(
+///   extractBytes: (...) => myInstance.extractBytes(...),
+///   extractFile: (...) => myInstance.extractFile(...),
+///   supportedMimeTypes: (...) => myInstance.supportedMimeTypes(...),
+///   priority: (...) => myInstance.priority(...),
+///   canHandle: (...) => myInstance.canHandle(...),
+/// );
+/// ```
+///
+abstract class DocumentExtractor {
+  /// Extract content from a byte array.
+  ///
+  /// This is the core extraction method that processes in-memory document data.
+  ///
+  /// # Arguments
+  ///
+  /// * `content` - Raw document bytes
+  /// * `mime_type` - MIME type of the document (already validated)
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// An `InternalDocument` containing the extracted elements, metadata, and tables.
+  /// The pipeline will convert this into the public `ExtractionResult`.
+  ///
+  /// # Errors
+  ///
+  /// - `KreuzbergError::Parsing` - Document parsing failed
+  /// - `KreuzbergError::Validation` - Invalid document structure
+  /// - `KreuzbergError::Io` - I/O errors (these always bubble up)
+  /// - `KreuzbergError::MissingDependency` - Required dependency not available
+  /// throws anyhow::Error on failure
+  Future<InternalDocumentBridge> extractBytes(Uint8List content, String mimeType, ExtractionConfig config);
+  /// Extract content from a file.
+  ///
+  /// Default implementation reads the file and calls `extract_bytes`.
+  /// Override for custom file handling, streaming, or memory optimizations.
+  ///
+  /// # Arguments
+  ///
+  /// * `path` - Path to the document file
+  /// * `mime_type` - MIME type of the document (already validated)
+  /// * `config` - Extraction configuration
+  ///
+  /// # Returns
+  ///
+  /// An `InternalDocument` containing the extracted elements, metadata, and tables.
+  ///
+  /// # Errors
+  ///
+  /// Same as `extract_bytes`, plus file I/O errors.
+  /// throws anyhow::Error on failure
+  Future<InternalDocumentBridge> extractFile(String path, String mimeType, ExtractionConfig config);
+  /// Get the list of MIME types supported by this extractor.
+  ///
+  /// Can include exact MIME types and prefix patterns:
+  /// - Exact: `"application/pdf"`, `"text/plain"`
+  /// - Prefix: `"image/*"` (matches any image type)
+  ///
+  /// # Returns
+  ///
+  /// A slice of MIME type strings.
+  Future<List<String>> supportedMimeTypes();
+  /// Get the priority of this extractor.
+  ///
+  /// Higher priority extractors are preferred when multiple extractors
+  /// support the same MIME type.
+  ///
+  /// # Priority Guidelines
+  ///
+  /// - **0-25**: Fallback/low-quality extractors
+  /// - **26-49**: Alternative extractors
+  /// - **50**: Default priority (built-in extractors)
+  /// - **51-75**: Premium/enhanced extractors
+  /// - **76-100**: Specialized/high-priority extractors
+  ///
+  /// # Returns
+  ///
+  /// Priority value (default: 50)
+  Future<int> priority();
+  /// Optional: Check if this extractor can handle a specific file.
+  ///
+  /// Allows for more sophisticated detection beyond MIME types.
+  /// Defaults to `true` (rely on MIME type matching).
+  ///
+  /// # Arguments
+  ///
+  /// * `path` - Path to the file to check
+  /// * `mime_type` - Detected MIME type
+  ///
+  /// # Returns
+  ///
+  /// `true` if the extractor can handle this file, `false` otherwise.
+  Future<bool> canHandle(String path, String mimeType);
+}
+
+/// Abstract class for the `Renderer` Rust trait.
+///
+/// Implement this class and register your implementation via:
+/// ```dart
+/// class MyRenderer implements Renderer {
+///   @override
+///   Future<String> render(...) async { ... }
+/// }
+///
+/// final impl = createRendererDartImpl(
+///   render: (...) => myInstance.render(...),
+/// );
+/// ```
+///
+abstract class Renderer {
+  /// Render an [`InternalDocument`] to the output format.
+  ///
+  /// # Arguments
+  ///
+  /// * `doc` - The internal document to render
+  ///
+  /// # Returns
+  ///
+  /// The rendered output as a string.
+  ///
+  /// # Errors
+  ///
+  /// Returns an error if rendering fails.
+  /// throws anyhow::Error on failure
+  Future<String> render(InternalDocumentBridge doc);
+}
+
+
+/// Synchronous extractor trait stub — used by e2e test plugin_api stubs.
+abstract class SyncExtractor {}
--- a/packages/dart/pubspec.yaml
+++ b/packages/dart/pubspec.yaml
@@ -0,0 +1,20 @@
+name: kreuzberg
+description: High-performance document intelligence library
+version: 5.0.0-rc.3
+repository: https://github.com/kreuzberg-dev/kreuzberg
+homepage: https://kreuzberg.dev
+environment:
+  sdk: '>=3.11.0 <4.0.0'
+dependencies:
+  # FRB runtime is pure-Dart; works in both Flutter and server-Dart contexts.
+  flutter_rust_bridge: '2.12.0'
+  # FRB codegen-2.x emits `@freezed` sealed classes annotated with these.
+  freezed_annotation: '^3.1.0'
+  json_annotation: '^4.11.0'
+dev_dependencies:
+  test: '^1.25.0'
+  lints: '^6.1.0'
+  # Required by flutter_rust_bridge_codegen 2.x for sealed classes.
+  freezed: '^3.2.5'
+  build_runner: '^2.15.0'
+  json_serializable: '^6.13.2'
--- a/packages/dart/rust/Cargo.toml
+++ b/packages/dart/rust/Cargo.toml
@@ -0,0 +1,31 @@
+[package]
+name = "kreuzberg-dart"
+version = "5.0.0-rc.3"
+edition = "2024"
+license = "Elastic-2.0"
+
+[package.metadata.cargo-machete]
+# Umbrella + sibling crates are pulled in so flutter_rust_bridge can resolve
+# every referenced type, but the generated Rust wrapper only `use`s a subset.
+ignored = ["kreuzberg"]
+
+[lib]
+crate-type = ["cdylib", "staticlib"]
+
+[dependencies]
+flutter_rust_bridge = "=2.12.0"
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+tokio = { version = "1", features = ["rt-multi-thread", "sync"] }
+async-trait = "0.1"
+
+[target.'cfg(not(all(target_os = "android", target_arch = "x86_64")))'.dependencies]
+kreuzberg = { path = "../../../crates/kreuzberg", features = ["full"] }
+
+[target.'cfg(all(target_os = "android", target_arch = "x86_64"))'.dependencies]
+kreuzberg = { path = "../../../crates/kreuzberg", default-features = false, features = ["android-target"] }
+
+[lints.rust]
+# flutter_rust_bridge uses #[cfg(frb_expand)] internally during macro expansion.
+# Declare it as a known cfg so rustc does not emit unexpected_cfgs warnings.
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(frb_expand)'] }
--- a/packages/dart/rust/build.rs
+++ b/packages/dart/rust/build.rs
@@ -0,0 +1,172 @@
+use std::path::Path;
+
+fn main() {
+    // Re-run whenever any Rust source changes.
+    println!("cargo:rerun-if-changed=src");
+
+    // Optional FRB codegen: regenerate flutter_rust_bridge artifacts when the
+    // tool is on PATH. Missing tool is not fatal — committed generated sources
+    // are checked in, and CI environments without FRB still build cleanly.
+    match std::process::Command::new("flutter_rust_bridge_codegen")
+        .args(["generate", "--config-file", "flutter_rust_bridge.yaml"])
+        .status()
+    {
+        Ok(status) if status.success() => {
+            // FRB v2.12+ emits `use` lists in an order rustfmt 2024 edition rewrites
+            // (e.g. `{transform_result_dco, Lifetimeable, Lockable}` →
+            // `{Lifetimeable, Lockable, transform_result_dco}`). Run rustfmt against
+            // the generated file so committed output is fmt-clean and `cargo fmt --check`
+            // stays green in CI.
+            match std::process::Command::new("rustfmt")
+                .args(["--edition", "2024", "src/frb_generated.rs"])
+                .status()
+            {
+                Ok(s) if s.success() => {}
+                Ok(s) => println!("cargo:warning=rustfmt on src/frb_generated.rs exited {s}"),
+                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
+                    println!(
+                        "cargo:warning=rustfmt not on PATH — skipping post-FRB format. Install rustfmt via rustup to keep generated bridge sources fmt-clean."
+                    );
+                }
+                Err(err) => println!("cargo:warning=failed to spawn rustfmt: {err}"),
+            }
+
+            // Patch the generated Dart entrypoint so the published package resolves
+            // its native library from its own installed location.
+            patch_published_loader();
+        }
+        Ok(status) => panic!("flutter_rust_bridge_codegen generate failed (exit code: {status})"),
+        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
+            println!(
+                "cargo:warning=flutter_rust_bridge_codegen not on PATH — skipping codegen. Install via `dart pub global activate flutter_rust_bridge_codegen` to regenerate FRB artifacts at build time."
+            );
+        }
+        Err(err) => panic!("failed to spawn flutter_rust_bridge_codegen: {err}"),
+    }
+}
+
+const FRB_GENERATED_DART: &str = "../lib/src/kreuzberg_bridge_generated/frb_generated.dart";
+const LOADER_MARKER: &str = "_alefResolveExternalLibrary";
+const FRB_INIT_PROLOGUE: &str = "  /// Initialize flutter_rust_bridge\n  static Future<void> init({\n    RustLibApi? api,\n    BaseHandler? handler,\n    ExternalLibrary? externalLibrary,\n    bool forceSameCodegenVersion = true,\n  }) async {\n";
+const FRB_INIT_REPLACEMENT: &str = r#"  /// Resolve the prebuilt native library from this package's own installed
+  /// location so the load works from any working directory and under hardened
+  /// runtimes. Returns `null` to defer to flutter_rust_bridge's default loader.
+  ///
+  /// Published pub.dev packages stage natives under `lib/src/native/<rid>/`
+  /// (e.g. `macos-arm64`, `linux-x64`). For local FRB-dev builds the dylib is
+  /// emitted into `lib/src/kreuzberg_bridge_generated/`; that
+  /// path is searched as a fallback.
+  static Future<ExternalLibrary?> _alefResolveExternalLibrary() async {
+    try {
+      final packageRoot =
+          await Isolate.resolvePackageUri(Uri.parse('package:kreuzberg/kreuzberg.dart'));
+      if (packageRoot == null) return null;
+      final libNames = _alefHostLibNames();
+      final searchDirs = <Uri>[
+        if (_alefHostRid() != null) packageRoot.resolve('src/native/${_alefHostRid()}/'),
+        packageRoot.resolve('src/kreuzberg_bridge_generated/'),
+      ];
+      for (final dir in searchDirs) {
+        for (final name in libNames) {
+          final libPath = dir.resolve(name).toFilePath();
+          if (File(libPath).existsSync()) {
+            return ExternalLibrary.open(libPath);
+          }
+        }
+      }
+    } catch (_) {
+      // Fall through to the default loader on any resolution failure.
+    }
+    return null;
+  }
+
+  /// Map the host platform to the pub.dev native staging RID. Returns `null`
+  /// for unrecognized host triples so the FRB-dev fallback path runs instead.
+  static String? _alefHostRid() {
+    final abi = Abi.current();
+    if (abi == Abi.macosArm64) return 'macos-arm64';
+    if (abi == Abi.macosX64) return 'macos-x64';
+    if (abi == Abi.linuxArm64) return 'linux-arm64';
+    if (abi == Abi.linuxX64) return 'linux-x64';
+    if (abi == Abi.windowsArm64) return 'windows-arm64';
+    if (abi == Abi.windowsX64) return 'windows-x64';
+    return null;
+  }
+
+  static List<String> _alefHostLibNames() {
+    // The Dart-binding Rust crate is `{stem}-dart` (per the cargo manifest
+    // template), which produces a cdylib named `lib{stem}_dart.{ext}` on Unix
+    // and `{stem}_dart.dll` on Windows.
+    if (Platform.isMacOS) return const ['libkreuzberg_dart.dylib'];
+    if (Platform.isWindows) return const ['kreuzberg_dart.dll'];
+    return const ['libkreuzberg_dart.so'];
+  }
+
+  /// Initialize flutter_rust_bridge
+  static Future<void> init({
+    RustLibApi? api,
+    BaseHandler? handler,
+    ExternalLibrary? externalLibrary,
+    bool forceSameCodegenVersion = true,
+  }) async {
+    externalLibrary ??= await _alefResolveExternalLibrary();
+"#;
+
+/// Inject the published-package native-library loader into `frb_generated.dart`.
+/// Idempotent: a no-op when the marker is already present or the FRB entrypoint
+/// signature is absent.
+fn patch_published_loader() {
+    let path = Path::new(FRB_GENERATED_DART);
+    let Ok(source) = std::fs::read_to_string(path) else {
+        println!(
+            "cargo:warning=published-loader patch skipped: {} not found",
+            FRB_GENERATED_DART
+        );
+        return;
+    };
+    if source.contains(LOADER_MARKER) {
+        return;
+    }
+    if !source.contains(FRB_INIT_PROLOGUE) {
+        println!("cargo:warning=published-loader patch skipped: FRB init prologue not found");
+        return;
+    }
+
+    let mut patched = source.replacen(FRB_INIT_PROLOGUE, FRB_INIT_REPLACEMENT, 1);
+
+    // Ensure the helper's `File`/`Isolate`/`Abi` dependencies are imported.
+    for (probe, line) in [
+        ("import 'dart:io';", "import 'dart:io';\n"),
+        ("import 'dart:isolate';", "import 'dart:isolate';\n"),
+        ("import 'dart:ffi';", "import 'dart:ffi';\n"),
+    ] {
+        if patched.contains(probe) {
+            continue;
+        }
+        if let Some(pos) = patched.find("\nimport ") {
+            patched.insert_str(pos + 1, line);
+        } else {
+            patched.insert_str(0, line);
+        }
+    }
+
+    if patched != source {
+        if let Err(err) = std::fs::write(path, &patched) {
+            println!("cargo:warning=failed to write published-loader patch: {err}");
+            return;
+        }
+        match std::process::Command::new("dart")
+            .args(["format", FRB_GENERATED_DART])
+            .status()
+        {
+            Ok(s) if s.success() => {}
+            Ok(s) => println!("cargo:warning=dart format on {} exited {}", FRB_GENERATED_DART, s),
+            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
+                println!(
+                    "cargo:warning=dart not on PATH — skipping post-patch format. Install Dart SDK to keep generated FRB Dart sources fmt-clean."
+                );
+            }
+            Err(err) => println!("cargo:warning=failed to spawn dart format: {err}"),
+        }
+    }
+}
--- a/packages/dart/rust/flutter_rust_bridge.yaml
+++ b/packages/dart/rust/flutter_rust_bridge.yaml
@@ -0,0 +1,4 @@
+rust_root: .
+rust_input: crate
+dart_output: ../lib/src/kreuzberg_bridge_generated
+add_mod_to_lib: false
--- a/packages/dart/rust/src/frb_generated.rs
+++ b/packages/dart/rust/src/frb_generated.rs
--- a/packages/dart/rust/src/lib.rs
+++ b/packages/dart/rust/src/lib.rs
--- a/packages/dart/test/kreuzberg_test.dart
+++ b/packages/dart/test/kreuzberg_test.dart
@@ -0,0 +1,7 @@
+import 'package:test/test.dart';
+
+void main() {
+  test('placeholder', () {
+    expect(1 + 1, equals(2));
+  });
+}