[workspace] alef_version = "0.21.0" languages = [ "python", "node", "ruby", "php", "ffi", "go", "java", "csharp", "elixir", "wasm", "r", "dart", "kotlin_android", "swift", "zig", ] # --------------------------------------------------------------------------- # Tool selection # --------------------------------------------------------------------------- # Force regen: Java bytes marshalling + FormatMetadata JsonUnwrapped fix [workspace.tools] python_package_manager = "uv" node_package_manager = "pnpm" # --------------------------------------------------------------------------- # DTO styles # --------------------------------------------------------------------------- [workspace.dto] python = "dataclass" python_output = "typed-dict" node = "interface" ruby = "struct" php = "readonly-class" elixir = "struct" go = "struct" java = "record" csharp = "record" r = "list" # --------------------------------------------------------------------------- # Generation control # --------------------------------------------------------------------------- [workspace.generate] bindings = true errors = true configs = true async_wrappers = true type_conversions = true package_metadata = true public_api = true [workspace.generate_overrides.wasm] async_wrappers = false [workspace.format] enabled = false # Disable the default format_generated pass for node (pnpm dlx oxfmt .) which runs # from the repo root and chokes on vendored HTML fixtures and Helm chart YAML. # Post-generation formatting is handled by [crates.lint.node] format command instead. [workspace.format_overrides.node] enabled = false # --------------------------------------------------------------------------- # Version synchronization # --------------------------------------------------------------------------- [workspace.sync] extra_paths = [ "packages/python/kreuzberg/__init__.py", "packages/ruby/lib/kreuzberg/version.rb", "packages/ruby/ext/kreuzberg_rb/native/Cargo.toml", "crates/kreuzberg-node/package.json", "crates/kreuzberg-node/npm/*/package.json", "packages/go/v5/go.mod", "Cargo.toml", ] [[crates]] name = "kreuzberg" core_import = "kreuzberg" error_type = "KreuzbergError" error_constructor = "kreuzberg::KreuzbergError::Other({msg})" version_from = "Cargo.toml" workspace_root = "." sources = [ # Public API surface — only files with types/functions re-exported from lib.rs "crates/kreuzberg/src/lib.rs", "crates/kreuzberg/src/error.rs", # Public types (re-exported via `pub use types::*`) "crates/kreuzberg/src/types/extraction.rs", "crates/kreuzberg/src/types/metadata.rs", "crates/kreuzberg/src/types/page.rs", "crates/kreuzberg/src/types/tables.rs", "crates/kreuzberg/src/types/annotations.rs", "crates/kreuzberg/src/types/document_structure.rs", "crates/kreuzberg/src/types/ocr_elements.rs", "crates/kreuzberg/src/types/formats.rs", "crates/kreuzberg/src/types/uri.rs", "crates/kreuzberg/src/types/djot.rs", "crates/kreuzberg/src/types/internal.rs", # Config types (re-exported via `pub use core::config::*`) "crates/kreuzberg/src/core/config/mod.rs", "crates/kreuzberg/src/core/config/extraction/types.rs", "crates/kreuzberg/src/core/config/extraction/core.rs", "crates/kreuzberg/src/core/config/extraction/file_config.rs", "crates/kreuzberg/src/core/config/ocr.rs", "crates/kreuzberg/src/core/config/page.rs", "crates/kreuzberg/src/core/config/pdf.rs", "crates/kreuzberg/src/core/config/html_output.rs", "crates/kreuzberg/src/core/config/layout.rs", "crates/kreuzberg/src/core/config/acceleration.rs", "crates/kreuzberg/src/core/config/llm.rs", "crates/kreuzberg/src/core/config/content_filter.rs", "crates/kreuzberg/src/core/config/tree_sitter.rs", "crates/kreuzberg/src/core/config/email.rs", "crates/kreuzberg/src/core/config/formats.rs", # Public MIME/format detection functions "crates/kreuzberg/src/core/mime.rs", "crates/kreuzberg/src/core/formats.rs", # Plugin trait (for trait bridges) "crates/kreuzberg/src/plugins/traits.rs", "crates/kreuzberg/src/plugins/embedding.rs", "crates/kreuzberg/src/plugins/registry/mod.rs", # Public module APIs "crates/kreuzberg/src/rendering/mod.rs", "crates/kreuzberg/src/keywords/mod.rs", "crates/kreuzberg/src/chunking/mod.rs", "crates/kreuzberg/src/embeddings/mod.rs", "crates/kreuzberg/src/language_detection/mod.rs", "crates/kreuzberg/src/cache/core.rs", # Batch extraction functions (re-exported from lib.rs) "crates/kreuzberg/src/core/extractor/mod.rs", "crates/kreuzberg/src/core/extractor/sync.rs", # PDF rendering (render_pdf_page_to_png — re-exported from lib.rs) "crates/kreuzberg/src/pdf/render.rs", # PdfMetadata — referenced in types/metadata.rs as Pdf(PdfMetadata) variant "crates/kreuzberg/src/pdf/metadata.rs", # Office metadata types (used in metadata.rs) "crates/kreuzberg/src/extraction/office_metadata/mod.rs", "crates/kreuzberg/src/extraction/office_metadata/core_properties.rs", "crates/kreuzberg/src/extraction/office_metadata/app_properties.rs", "crates/kreuzberg/src/extraction/office_metadata/custom_properties.rs", "crates/kreuzberg/src/extraction/office_metadata/odt_properties.rs", # OCR/PaddleOCR types (re-exported from lib.rs) "crates/kreuzberg/src/ocr/types.rs", "crates/kreuzberg/src/paddle_ocr/mod.rs", # Plugin registry operations (list_document_extractors etc.) "crates/kreuzberg/src/plugins/extractor/mod.rs", # OCR backend registry operations (register/unregister/list/clear) "crates/kreuzberg/src/plugins/ocr.rs", # Renderer trait and registry operations "crates/kreuzberg/src/plugins/renderer.rs", # Validator registry operations "crates/kreuzberg/src/plugins/validator/mod.rs", # Post-processor trait and registry operations "crates/kreuzberg/src/plugins/processor/trait.rs", "crates/kreuzberg/src/plugins/processor/mod.rs", ] features = [ "full", "pdf", "ocr", "paddle-ocr", "paddle-ocr-types", "layout-detection", "layout-types", "embeddings", "embedding-presets", "chunking", "keywords-yake", "keywords-rake", "language-detection", "html", "tree-sitter", "office", "email", "archives", "stopwords", "auto-rotate", "auto-rotate-types", "tokio-runtime", "api", "mcp", "liter-llm", "quality", ] [crates.python] module_name = "_kreuzberg" exclude_types = [ # Arc> opaque types — codegen doesn't generate .lock() for methods "PooledString", ] exclude_functions = [ "calculate_quality_score", ] [crates.python.stubs] output = "packages/python/kreuzberg/" [crates.node] package_name = "@kreuzberg/node" exclude_types = [ # Opaque Arc> types — codegen doesn't generate .lock() calls for methods "StreamReader", "PooledString", ] exclude_functions = [ "calculate_quality_score", ] [crates.node.extra_dependencies] # Required by alef-emitted #[serde_with::serde_as] attribute on HashMap<_, Vec> # fields (e.g. binary cache maps). Not added by alef-backend-napi automatically. serde_with = "3" [crates.ruby] gem_name = "kreuzberg" exclude_functions = [ "calculate_quality_score", ] [crates.ruby.stubs] output = "packages/ruby/sig/" [crates.php] extension_name = "kreuzberg" exclude_types = [ # Enum types that don't have From implementation for PHP "ChunkerType", "OutputFormat", "EmbeddingModelType", "UriKind", "ChunkType", # PooledString has methods incompatible with ext-php-rs (buffer_mut, deref_mut, fmt(f)) # that trigger E0716/E0596/E0507 in the #[php_impl] macro expansion. "PooledString", # Tagged data enums whose flat-PHP-class From impls call .into() on PathBuf, # usize, TableGrid, Vec<(String,String)>, [(u32,u32);4] fields that don't have the # required Into/Into implementations. Tracked upstream in alef-backend-php. "ChunkSizing", "EmbeddingModelType", "NodeContent", "OcrBoundingGeometry", ] exclude_functions = [ "calculate_quality_score", ] [crates.elixir] app_name = "kreuzberg" exclude_functions = [ # batch_extract_* take Vec<(PathBuf, Option)> or # Vec<(Vec, String, Option<...>)> which codegen maps to Vec — # type mismatch; no safe tuple-marshalling across NIF boundary "batch_extract_files_sync", "batch_extract_bytes_sync", "batch_extract_files", "batch_extract_bytes", "calculate_quality_score", ] exclude_types = [ # Trait types cannot be FFI-bound "Recyclable", ] [crates.wasm] package_name = "@kreuzberg/wasm" exclude_types = [ # ORT/runtime-only types not available in the wasm-target feature set "OcrFallbackDecision", "OcrProcessor", "OcrCacheStats", "TessdataManager", "PageLayoutResult", # MCP parameter types not compatible with wasm-bindgen (Option fields) "BatchExtractFilesParams", ] exclude_functions = [ # paddle-ocr / layout-detection / embeddings / auto-rotate "embed_texts", "embed_text", "detect_layout", "detect_orientation", "run_ocr_pipeline", "process_ocr_element", "layout_runner", "record_success_on_current_span", "record_error_on_current_span", # ocr module (requires full ocr, not ocr-wasm) "compute_hash", "assemble_ocr_markdown", "validate_tesseract_version", # paddle_ocr module "is_language_supported", "language_to_script_family", "map_language_code", # embeddings module "normalize", "list_presets", # image module (ort-dependent preprocessing) "calculate_optimal_dpi", "calculate_smart_dpi", # ort_discovery "ensure_ort_available", # pdf layout_runner "run_layout_for_page", "run_layout_for_pdf", # telemetry internals "sanitize_path", # layout detection "config_from_extraction", "detect_layout_for_images", # Uses excluded types WasmDetectionResult/WasmRecognizedTable "recognize_page_tables", # Functions using excluded types/modules "generate_embeddings_for_chunks", "apply_heuristics", "greedy_nms", # docx extraction functions (internal, not public API) "detect_page_breaks_from_docx", "detect_table_page_numbers", # ooxml embedded object extraction (internal, returns complex types) "extract_ooxml_embedded_objects", "extract_and_process_embedded_files", # markdown utility functions (internal) "cells_to_text", "cells_to_markdown", # Table processing stubs (internal, incomplete implementations) "calculate_quality_score", "reconstruct_table", "table_to_markdown", "build_cell_grid", "post_process_table", "is_well_formed_table", "parse_jotdown_attributes", "parse_jotdown_inline", # Vec params not supported by wasm-bindgen "merge_segments", # MCP server functions (not available in WASM) "start_mcp_server_http", "start_mcp_server_http_with_config", # Vec<&str> vs &[String] / texts_refs scope — codegen type mismatch "batch_reduce_tokens", "chunk_texts_batch", ] features = ["wasm-target"] [crates.wasm.extra_dependencies] async-trait = "0.1" [crates.ffi] prefix = "kreuzberg" header_name = "kreuzberg.h" lib_name = "kreuzberg_ffi" visitor_callbacks = false # Plugin trait-bridge error construction. The generated FFI plugin shims # (`plugin_impl_initialize`, `plugin_impl_shutdown`) need to construct a # `KreuzbergError` from a runtime String. KreuzbergError::Plugin is a struct # variant with two fields and cannot be built via `From`, so we # provide the literal here. The placeholder local `msg` carries the message. plugin_error_constructor = "kreuzberg::KreuzbergError::Plugin { message: msg, plugin_name: String::new() }" exclude_functions = [ "calculate_quality_score", ] # x86_64-linux-android (emulator) lacks a pyke ORT prebuilt. The C FFI cdylib is # embedded into Android app builds via jniLibs/x86_64/, so the kreuzberg-ffi # crate must drop ORT-dependent features on that target. All other targets # (incl. arm64 Android phones) keep the full ORT-enabled feature set. [[crates.ffi.target_dep_overrides]] cfg = 'all(target_os = "android", target_arch = "x86_64")' features = ["android-target"] [crates.go] module = "github.com/kreuzberg-dev/kreuzberg/v5" package_name = "kreuzberg" exclude_functions = [ "calculate_quality_score", ] exclude_types = [ # Internal DTO used only at the FFI boundary (JSON-marshalled). Generated # Go bindings don't surface this type to consumers, so trait-bridge stubs # must substitute it with json.RawMessage / []byte. "InternalDocument", # SyncExtractor is a Rust-only marker trait for selecting the sync # extraction path; not exposed via the Go bindings — substitute with # json.RawMessage in trait-bridge stubs. "SyncExtractor", ] [crates.java] package = "dev.kreuzberg" ffi_style = "panama" exclude_types = [ # Trait-bridge opaque types serialized as JSON strings at FFI boundary "InternalDocument", "OcrBackendType", "ProcessingStage", "SyncExtractor", ] exclude_functions = [ "calculate_quality_score", ] [crates.dart] pubspec_name = "kreuzberg" lib_name = "kreuzberg" style = "frb" features = ["full"] stub_methods = ["batch_extract_bytes", "batch_extract_bytes_sync"] exclude_functions = [ "calculate_quality_score", ] # x86_64-linux-android (emulator) and x86_64-apple-ios (deprecated intel iOS # simulator) both lack a pyke ORT prebuilt and there is no path to one short # of compiling ORT from source for those triples. All other targets (incl. # arm64 Android phones and arm64 iOS device/sim via pyke prebuilts) keep # the full ORT-enabled feature set. [[crates.dart.target_dep_overrides]] cfg = 'all(target_os = "android", target_arch = "x86_64")' features = ["android-target"] default_features = false [crates.kotlin_android] # Self-contained Android library project at packages/kotlin-android/. Server- # side Kotlin/JVM consumers use the Java binding directly (Kotlin interops # with Java records natively); the Android AAR ships jniLibs/arm64-v8a/ + # jniLibs/x86_64/ so Gradle embeds the kreuzberg-ffi cdylib into app builds # without manual setup. Emitted by alef-backend-kotlin-android (alef ≥ 0.16). package = "dev.kreuzberg" namespace = "dev.kreuzberg" artifact_id = "kreuzberg-android" group_id = "dev.kreuzberg" abis = ["arm64-v8a", "x86_64"] exclude_functions = [ # embed_texts_async creates naming conflict with suspend wrapper of embed_texts. # Callers should use embedTextsAsync (suspend) from embed_texts instead. "embed_texts_async", "calculate_quality_score", ] [crates.swift] module_name = "Kreuzberg" package_name = "Kreuzberg" features = ["full"] exclude_types = [ # Internal DTO type used by plugin trait-bridges; marshalled as JSON strings at FFI boundary "InternalDocument", ] exclude_fields = ["ExtractionResult.ocr_internal_document", "OcrExtractionResult.internal_document"] exclude_functions = [ "calculate_quality_score", ] [crates.csharp] namespace = "Kreuzberg" # project_file directs `dotnet format` to the .csproj inside packages/csharp/Kreuzberg/ # rather than the parent directory (which has no .sln or .csproj at its root). project_file = "packages/csharp/Kreuzberg/Kreuzberg.csproj" exclude_functions = [ "calculate_quality_score", ] [crates.r] package_name = "kreuzberg" exclude_functions = [ "calculate_quality_score", ] [crates.zig] module_name = "kreuzberg" exclude_functions = [ "calculate_quality_score", ] [crates.output] python = "crates/kreuzberg-py/src/" node = "crates/kreuzberg-node/src/" ruby = "packages/ruby/ext/kreuzberg_rb/src/" php = "crates/kreuzberg-php/src/" ffi = "crates/kreuzberg-ffi/src/" go = "packages/go/v5/" elixir = "packages/elixir/native/kreuzberg_nif/src/" wasm = "crates/kreuzberg-wasm/src/" java = "packages/java/" csharp = "packages/csharp/src/" r = "packages/r/src/rust/src/" kotlin_android = "packages/kotlin-android/" swift = "packages/swift/Sources/Kreuzberg/" dart = "packages/dart/lib/src/" zig = "packages/zig/src/" [crates.exclude] types = [ # Generic types — alef(skip) source annotation not picked up for generic structs # (alef-extract does not propagate cfg_attr through type-parameter bounds). "Pool", # Internal docx/hwp parser types — kept hidden from bindings (still # alef(skip)-annotated in source); serde derives added in Phase 1 but # binding surface is unchanged. Listed here to defend against future # codegen passes that might re-emit them as transitive container fields. "kreuzberg::extraction::docx::parser::Table", "kreuzberg::extraction::docx::parser::TableCell", "kreuzberg::extraction::docx::parser::TableRow", "kreuzberg::extraction::docx::parser::HeaderFooter", "kreuzberg::extraction::docx::parser::Note", "kreuzberg::extraction::hwp::model::Section", ] methods = [ # From trait impls — auto-emitted by derive/trait-bridge "ApiError.from", # fmt trait impls — auto-emitted by derive, not callable from bindings "BBox.fmt", "LayoutDetection.fmt", "PooledString.fmt", # Deref/DerefMut impls for internal pooled types — not part of public API "PooledString.deref", "PooledString.deref_mut", "PooledString.buffer_mut", # Internal methods that can't easily be source-annotated (trait impls or unusual patterns) "BBox.center", "TessdataManager.manifest", # Deprecated Rust-only aliases — excluded from bindings "LayoutDetection.class", "LayoutRegion.class", ] [crates.lint.python] precondition = "command -v ruff >/dev/null 2>&1" before = "cd packages/python && uv sync --no-install-project --no-install-workspace" typecheck = "cd packages/python && uv run --no-sync mypy ." [crates.lint.node] precondition = "command -v pnpm >/dev/null 2>&1" format = "pnpm dlx oxfmt crates/kreuzberg-node" check = "pnpm dlx oxlint crates/kreuzberg-node" [crates.lint.ruby] precondition = "command -v bundle >/dev/null 2>&1" before = "cd packages/ruby && bundle install" format = "cd packages/ruby && bundle exec rubocop -a ." [crates.lint.php] precondition = "command -v composer >/dev/null 2>&1" before = "cd packages/php && composer install" format = "cd packages/php && vendor/bin/php-cs-fixer fix ." check = "cd packages/php && vendor/bin/phpstan --configuration=phpstan.neon --memory-limit=512M" [crates.lint.go] before = "cargo build --release -p kreuzberg-ffi" [crates.lint.elixir] precondition = "command -v mix >/dev/null 2>&1" before = "cd packages/elixir && mix deps.get" check = "cd packages/elixir && mix credo" [crates.lint.java] precondition = "command -v mvn >/dev/null 2>&1" check = "mvn -f packages/java/pom.xml spotless:check -q" [crates.lint.kotlin_android] precondition = "command -v gradle >/dev/null 2>&1" format = "cd packages/kotlin-android && gradle ktlintFormat --no-daemon" check = "cd packages/kotlin-android && gradle ktlintCheck --no-daemon" [crates.lint.swift] precondition = "command -v swift >/dev/null 2>&1" format = "swift format --in-place --recursive packages/swift/Sources" check = "swift format lint --recursive packages/swift/Sources" [crates.lint.dart] precondition = "command -v dart >/dev/null 2>&1" format = "cd packages/dart && dart format ." check = "cd packages/dart && dart analyze" [crates.lint.zig] precondition = "command -v zig >/dev/null 2>&1" format = "cd packages/zig && zig fmt src" check = "cd packages/zig && zig fmt --check src" [crates.update.r] precondition = "command -v Rscript >/dev/null" update = "cd packages/r && Rscript -e \"if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes', repos = 'https://cloud.r-project.org'); remotes::update_packages(ask = FALSE)\"" upgrade = "cd packages/r && Rscript -e \"if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes', repos = 'https://cloud.r-project.org'); remotes::update_packages(ask = FALSE)\"" [crates.update.kotlin_android] # kotlin-android has no managed lockfile — dependency versions are pinned # directly in the alef-generated build.gradle.kts. Updating means running # alef regen with bumped versions in the upstream alef-backend-kotlin-android, # not a gradle command. The `dependencyUpdates` task would require applying # the ben-manes versions plugin which is not part of the generated template. precondition = "command -v gradle >/dev/null 2>&1" update = "echo 'kotlin-android: no-op (versions pinned by alef regen)'" upgrade = "echo 'kotlin-android: no-op (versions pinned by alef regen)'" [crates.update.swift] precondition = "command -v swift >/dev/null 2>&1" update = "cd packages/swift && swift package update" upgrade = "cd packages/swift && swift package update" [crates.update.dart] precondition = "command -v dart >/dev/null 2>&1" update = "cd packages/dart && dart pub upgrade" upgrade = "cd packages/dart && dart pub upgrade --major-versions" [crates.update.zig] precondition = "command -v zig >/dev/null 2>&1" update = "cd packages/zig && zig build --fetch" upgrade = "cd packages/zig && zig build --fetch" [crates.test.python] precondition = "command -v uv >/dev/null 2>&1" # `uv run --no-sync` in the e2e step never refreshes the e2e venv, and `uv sync` # installs `kreuzberg` as a *copied* (non-editable) snapshot. A stale snapshot # built before the wrapper modules were generated leaves the venv with only # `_kreuzberg.abi3.so` + `py.typed`, breaking `from kreuzberg import ...`. # Force a clean rebuild of that snapshot from current source before testing. before = "cd packages/python && maturin develop --release && cd ../../e2e/python && uv sync --reinstall-package kreuzberg" e2e = "cd e2e/python && uv run --no-sync pytest tests/ -q" [crates.test.node] precondition = "command -v pnpm >/dev/null 2>&1" before = "cd crates/kreuzberg-node && napi build --output-dir . --release --platform" e2e = "cd e2e/node && CI=true pnpm install && pnpm test" [crates.test.go] precondition = "command -v go >/dev/null 2>&1" before = "cargo build --release -p kreuzberg-ffi && mkdir -p packages/go/v5/include && cp crates/kreuzberg-ffi/include/kreuzberg.h packages/go/v5/include/" e2e = "cd e2e/go && go test ./... -count=1" [crates.test.ruby] precondition = "command -v bundle >/dev/null 2>&1" before = "cd packages/ruby && bundle install && bundle exec rake compile && cd ../../e2e/ruby && bundle install" e2e = "cd e2e/ruby && bundle exec rspec" [crates.test.php] precondition = "command -v composer >/dev/null 2>&1" before = "cargo build --release -p kreuzberg-php && cd e2e/php && composer install && bash ../../scripts/setup-php-ext-ini.sh" e2e = "cd e2e/php && PHP_INI_SCAN_DIR= php -c php.ini vendor/bin/phpunit" [crates.test.java] precondition = "command -v mvn >/dev/null 2>&1" before = "cargo build --release -p kreuzberg-ffi && cd packages/java && mvn -T 1 package -DskipTests -q" e2e = "cd e2e/java && mvn test -q" [crates.test.csharp] precondition = "command -v dotnet >/dev/null 2>&1" before = "cargo build --release -p kreuzberg-ffi && bash scripts/stage_csharp_native_local.sh" e2e = "dotnet test e2e/csharp" [crates.test.elixir] precondition = "command -v mix >/dev/null 2>&1" before = "cd packages/elixir && KREUZBERG_BUILD=1 mix deps.get && KREUZBERG_BUILD=1 mix compile && cd ../../e2e/elixir && KREUZBERG_BUILD=1 mix deps.get" e2e = "cd e2e/elixir && KREUZBERG_BUILD=1 mix test" [crates.test.r] precondition = "command -v Rscript >/dev/null 2>&1" before = "rm -f packages/r/src/kreuzberg.so packages/r/src/entrypoint.o && cd packages/r/src/rust && cargo build --release" command = "cd e2e/r && Rscript -e \"devtools::load_all('../../packages/r'); testthat::set_max_fails(Inf); testthat::test_dir('tests')\"" e2e = "cd e2e/r && Rscript -e \"devtools::load_all('../../packages/r'); testthat::set_max_fails(Inf); testthat::test_dir('tests')\"" [crates.test.wasm] precondition = "command -v wasm-pack >/dev/null 2>&1" # Build the nodejs target into pkg/nodejs/ so the e2e harness (Node-based vitest) # can resolve the wasi/env imports — wasm-pack inlines them as require() shims # under --target nodejs. The web target leaves them as ESM `import * as foo from "env"` # statements that Node cannot resolve. The published bundler target (pkg/*) is built # separately by the publish pipeline. before = "wasm-pack build crates/kreuzberg-wasm --release --target nodejs --out-dir pkg/nodejs && cd e2e/wasm && pnpm install --ignore-workspace" e2e = "cd e2e/wasm && pnpm test" [crates.test.rust] precondition = "command -v cargo >/dev/null 2>&1" e2e = "cd e2e/rust && cargo test" [crates.test.kotlin_android] precondition = "command -v gradle >/dev/null 2>&1" before = ["cargo build --release -p kreuzberg-ffi", "ln -sf libkreuzberg_ffi.dylib target/release/libkreuzberg_jni.dylib 2>/dev/null || ln -sf libkreuzberg_ffi.so target/release/libkreuzberg_jni.so 2>/dev/null || true"] command = "cd packages/kotlin-android && gradle test --no-daemon" e2e = "cd e2e/kotlin_android && gradle test --no-daemon" [crates.test.swift] precondition = "command -v swift >/dev/null 2>&1" before = ["cargo build --release -p kreuzberg-swift", "bash scripts/setup-swift-bridge.sh"] command = "cd packages/swift && swift test" e2e = "cd e2e/swift_e2e && swift test" [crates.test.dart] # packages/dart is a Flutter plugin (pubspec declares `flutter:` SDK constraint # plus a `flutter: plugin:` ffiPlugin block), so `dart pub` is rejected by the # resolver — use `flutter pub` / `flutter test` throughout. precondition = "command -v flutter >/dev/null 2>&1" before = """ { command -v flutter_rust_bridge_codegen >/dev/null 2>&1 || cargo install flutter_rust_bridge_codegen --version 2.12.0; } && \ cd packages/dart && flutter pub get && cd rust && \ { \ INPUT_HASH=$(printf '%s' "$(cat src/lib.rs flutter_rust_bridge.yaml)" | shasum | cut -d' ' -f1); \ HASH_FILE=.frb_codegen_hash; \ if [ -f "$HASH_FILE" ] && [ "$(cat "$HASH_FILE" 2>/dev/null)" = "$INPUT_HASH" ]; then \ echo "FRB codegen skipped (inputs unchanged)"; \ else \ flutter_rust_bridge_codegen generate --config-file flutter_rust_bridge.yaml && \ echo "$INPUT_HASH" > "$HASH_FILE"; \ fi; \ } && cd ../../.. && cargo build --release -p kreuzberg-dart """ command = "cd packages/dart && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter pub get && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter test" e2e = "cd e2e/dart && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter pub get && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter test" [crates.test.zig] precondition = "command -v zig >/dev/null 2>&1" before = "cargo build --release -p kreuzberg-ffi" command = "cd packages/zig && zig build test" e2e = "cd e2e/zig && zig build test" [crates.publish] core_crate = "crates/kreuzberg" [crates.publish.languages.ruby] vendor_mode = "registry" [crates.publish.languages.elixir] vendor_mode = "registry" nif_versions = ["2.16", "2.17"] [crates.publish.languages.r] vendor_mode = "registry" [crates.publish.languages.dart] # dart/rust/Cargo.toml carries path deps on kreuzberg; rewrite to registry on publish. # NOTE: resolve_binding_manifest does not yet resolve the Dart manifest path — the rewrite # is a no-op until alef is updated to handle packages/dart/rust/Cargo.toml. vendor_mode = "registry" [crates.publish.languages.ffi] pkg_config = true cmake_config = true [crates.e2e] fixtures = "fixtures" output = "e2e" languages = [ "python", "rust", "node", "go", "ruby", "elixir", "wasm", "java", "csharp", "php", "r", "dart", "kotlin_android", "swift", "zig", "homebrew", ] fields_optional = [ "metadata.format", "metadata.format.excel", "metadata.format.excel.sheet_count", "metadata.title", "metadata.subject", "metadata.authors", "metadata.keywords", "metadata.language", "metadata.created_at", "metadata.modified_at", "metadata.created_by", "metadata.modified_by", "metadata.output_format", "metadata.category", "metadata.tags", "metadata.document_version", "metadata.abstract_text", "chunks", "images", "pages", "elements", "djot_content", "ocr_elements", "document", "extraction_method", "detected_languages", "extracted_keywords", "quality_score", "annotations", ] fields_array = [ "tables", "chunks", "images", "pages", "elements", "ocr_elements", "detected_languages", "extracted_keywords", "annotations", "processing_warnings", "metadata.sheet_names", "metadata.format.excel.sheet_names", "document.node_types", ] fields_method_calls = ["metadata.format.excel"] result_fields = [ "content", "mime_type", "djot_content", "result", ] [crates.e2e.call] function = "extract_file" module = "kreuzberg" result_var = "result" async = true returns_result = true args = [ { name = "path", field = "input.path", type = "file_path" }, { name = "mime_type", field = "input.mime_type", type = "string", optional = true }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.call.overrides.rust] crate_name = "kreuzberg" function = "extract_file" [crates.e2e.call.overrides.python] module = "kreuzberg" function = "extract_file" options_type = "ExtractionConfig" [crates.e2e.call.overrides.node] module = "kreuzberg" function = "extractFile" options_type = "ExtractionConfig" arg_order = ["path", "config", "mime_type"] result_enum_fields = { outputFormat = "OutputFormat", "metadata.format" = "FormatMetadata" } [crates.e2e.call.overrides.go] module = "github.com/kreuzberg-dev/kreuzberg/v5" function = "ExtractFile" alias = "kreuzberg" options_type = "ExtractionConfig" [crates.e2e.call.overrides.java] class = "dev.kreuzberg.Kreuzberg" function = "extractFile" options_type = "ExtractionConfig" enum_fields = { outputFormat = "OutputFormat" } [crates.e2e.call.overrides.csharp] class = "KreuzbergLib" function = "ExtractFile" options_type = "ExtractionConfig" json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" } nested_types = { model = "EmbeddingModelType", embedding = "EmbeddingConfig", keywords = "KeywordConfig", pages = "PageConfig", securityLimits = "SecurityLimits", treeSitter = "TreeSitterConfig" } [crates.e2e.call.overrides.ruby] options_type = "Kreuzberg::ExtractionConfig" [crates.e2e.call.overrides.elixir] # Elixir Rustler facades don't consistently use keyword opts (some use positional defaults), # so struct-literal generation causes mismatches. Use JSON strings instead for compatibility. # handle_struct_type = "ExtractionConfig" [crates.e2e.call.overrides.wasm] options_type = "WasmExtractionConfig" bigint_fields = ["cacheTtlSecs", "extractionTimeoutSecs"] [crates.e2e.call.overrides.swift] unnamed_arg_indices = [0, 1, 2] [crates.e2e.call.overrides.php] class = "Kreuzberg\\Kreuzberg" function = "extractFile" options_type = "ExtractionConfig" [crates.e2e.call.overrides.r] function = "extract_file" options_type = "ExtractionConfig" [crates.e2e.call.overrides.zig] function = "extract_file_sync" async = false result_is_json_struct = true [crates.e2e.call.overrides.dart] # Dart reads files to bytes before passing to Rust; use extractBytes (bytes+mime) # instead of extractFile (path string). The file_path arg type generates readAsBytesSync(). function = "extractBytes" arg_name_map = { path = "content" } [crates.e2e.calls.extract_file] function = "extract_file" module = "kreuzberg" async = true returns_result = true # kreuzberg's `chunks` is a plain `Vec` field on `ExtractionResult`, # not a streaming channel. Without this opt-out, alef's streaming auto-detect # triggers on any assertion targeting `chunks` / `chunks.length` and emits # `range over stream` / `ChatCompletionChunk` codegen. streaming = false args = [ { name = "path", field = "input.path", type = "file_path" }, { name = "mime_type", field = "input.mime_type", type = "string", optional = true }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.extract_file.overrides.dart] # Dart can pass file paths through FRB as plain strings. The dart e2e generator # routes source-code paths (e.g. `code/hello.py`) through extractFile so # CodeExtractor's path-based extension detection runs; everything else is # remapped to extractBytes automatically. async = true [crates.e2e.calls.extract_file.overrides.swift] async = false unnamed_arg_indices = [0, 1, 2] [crates.e2e.calls.extract_file.overrides.zig] # Zig binding has no async runtime; redirect to the sync variant and parse JSON. function = "extract_file_sync" async = false result_is_json_struct = true [crates.e2e.calls.extract_file.overrides.php] # PHP facade exposes the async variant under the bare name (`extractFile`); # the codegen emits it automatically — no override needed. [crates.e2e.calls.extract_bytes] function = "extract_bytes" module = "kreuzberg" async = true returns_result = true streaming = false args = [ { name = "content", field = "input.data", type = "bytes" }, { name = "mime_type", field = "input.mime_type", type = "string" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.extract_bytes.overrides.csharp] options_type = "ExtractionConfig" options_via = "from_json" json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" } [crates.e2e.calls.extract_bytes.overrides.swift] async = false unnamed_arg_indices = [0, 1, 2] [crates.e2e.calls.extract_bytes.overrides.python] arg_name_map = { data = "content" } options_type = "ExtractionConfig" [crates.e2e.calls.extract_bytes.overrides.r] arg_name_map = { data = "content" } [crates.e2e.calls.extract_bytes.overrides.elixir] # Rustler facade for extract_bytes_async uses positional default style: # func(content, mime_type, config \\ nil), not keyword opts. # Don't emit keyword form for config arg. [crates.e2e.calls.extract_bytes.overrides.php] # PHP facade exposes the async variant under the bare name (`extractBytes`); # the codegen emits it automatically — no override needed. [crates.e2e.calls.extract_bytes.overrides.zig] function = "extract_bytes_sync" async = false result_is_json_struct = true [crates.e2e.calls.extract_file_sync] function = "extract_file_sync" module = "kreuzberg" async = false returns_result = true streaming = false skip_languages = ["wasm"] args = [ { name = "path", field = "input.path", type = "file_path" }, { name = "mime_type", field = "input.mime_type", type = "string", optional = true }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.extract_file_sync.overrides.node] options_type = "ExtractionConfig" arg_order = ["path", "config", "mime_type"] [crates.e2e.calls.extract_file_sync.overrides.csharp] options_type = "ExtractionConfig" options_via = "from_json" json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" } [crates.e2e.calls.extract_file_sync.overrides.swift] unnamed_arg_indices = [0, 1, 2] [crates.e2e.calls.extract_file_sync.overrides.zig] function = "extract_file_sync" async = false result_is_json_struct = true [crates.e2e.calls.extract_file_sync.overrides.dart] # Dart can pass file paths through FRB as plain strings — extractFileSync(path, # mimeType?, config) is the supported facade. The dart e2e generator detects # `text/x-source-code` paths (e.g. `code/hello.py`) and keeps the path-based call # so CodeExtractor's extract_file (extension-based language detection) runs. # For everything else the generator remaps to extractBytesSync. async = false [crates.e2e.calls.extract_bytes_sync] function = "extract_bytes_sync" module = "kreuzberg" async = false returns_result = true streaming = false skip_languages = ["wasm"] args = [ { name = "content", field = "input.data", type = "bytes" }, { name = "mime_type", field = "input.mime_type", type = "string" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.extract_bytes_sync.overrides.csharp] options_type = "ExtractionConfig" json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" } [crates.e2e.calls.extract_bytes_sync.overrides.python] arg_name_map = { data = "content" } options_type = "ExtractionConfig" [crates.e2e.calls.extract_bytes_sync.overrides.r] arg_name_map = { data = "content" } [crates.e2e.calls.extract_bytes_sync.overrides.swift] unnamed_arg_indices = [0, 1, 2] [crates.e2e.calls.extract_bytes_sync.overrides.zig] function = "extract_bytes_sync" async = false result_is_json_struct = true [crates.e2e.calls.render_html.overrides.python] result_is_simple = true arg_name_map = { doc = "html" } [crates.e2e.calls.render_markdown.overrides.python] result_is_simple = true arg_name_map = { doc = "html" } [crates.e2e.calls.render_djot.overrides.python] result_is_simple = true arg_name_map = { doc = "html" } [crates.e2e.calls.render_json.overrides.python] result_is_simple = true arg_name_map = { doc = "html" } [crates.e2e.calls.render_plain.overrides.python] result_is_simple = true arg_name_map = { doc = "html" } [crates.e2e.calls.djot_to_html.overrides.python] result_is_simple = true arg_name_map = { djot = "djot_source" } [crates.e2e.calls.chunk_text.overrides.wasm] options_type = "WasmChunkingConfig" [crates.e2e.calls.chunk_text.overrides.rust] extra_args = ["None"] [crates.e2e.calls.chunk_text.overrides.node] options_type = "JsChunkingConfig" [crates.e2e.calls.chunk_text.overrides.csharp] options_type = "ChunkingConfig" extra_args = ["null"] [crates.e2e.calls.chunk_text.overrides.java] options_type = "ChunkingConfig" [crates.e2e.calls.chunk_semantic.overrides.wasm] options_type = "WasmChunkingConfig" [crates.e2e.calls.chunk_semantic.overrides.rust] extra_args = ["None"] [crates.e2e.calls.chunk_semantic.overrides.node] options_type = "JsChunkingConfig" [crates.e2e.calls.chunk_semantic.overrides.csharp] options_type = "ChunkingConfig" extra_args = ["null"] [crates.e2e.calls.chunk_semantic.overrides.java] options_type = "ChunkingConfig" [crates.e2e.calls.chunk_texts_batch.overrides.node] options_type = "JsChunkingConfig" [crates.e2e.calls.chunk_texts_batch.overrides.csharp] options_type = "ChunkingConfig" [crates.e2e.calls.chunk_texts_batch.overrides.java] options_type = "ChunkingConfig" [crates.e2e.calls.detect_languages.overrides.go] options_type = "LanguageDetectionConfig" [crates.e2e.calls.detect_languages.overrides.node] options_type = "JsLanguageDetectionConfig" [crates.e2e.calls.detect_languages.overrides.csharp] options_type = "LanguageDetectionConfig" [crates.e2e.calls.detect_languages.overrides.java] options_type = "LanguageDetectionConfig" [crates.e2e.calls.detect_mime_type_from_bytes] function = "detect_mime_type_from_bytes" module = "kreuzberg" async = false returns_result = true result_is_simple = true args = [{ name = "content", field = "input.data", type = "bytes" }] [crates.e2e.calls.detect_mime_type_from_bytes.overrides.swift] unnamed_arg_indices = [0] [crates.e2e.calls.validate_chunking_params.overrides.python] arg_name_map = { max_characters = "max_chars", overlap = "max_overlap" } [crates.e2e.calls.normalize_whitespace.overrides.python] result_is_simple = true arg_name_map = { text = "s" } [crates.e2e.calls.dedup_text.overrides.go] result_is_array = true result_is_simple = true returns_result = true [crates.e2e.calls.get_extensions_for_mime] result_is_array = true result_is_simple = true function = "get_extensions_for_mime" module = "kreuzberg" async = false returns_result = true args = [{ name = "mime_type", field = "input.mime_type", type = "string" }] [crates.e2e.calls.batch_extract_files] function = "batch_extract_files" module = "kreuzberg" async = true returns_result = true result_is_vec = true skip_languages = ["wasm"] args = [ { name = "paths", field = "input.paths", type = "json_object", owned = true, go_type = "BatchFileItem", element_type = "BatchFileItem" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.batch_extract_files.overrides.csharp] result_is_vec = true options_type = "ExtractionConfig" [crates.e2e.calls.batch_extract_files.overrides.java] result_is_vec = true options_type = "ExtractionConfig" enum_fields = { outputFormat = "OutputFormat" } [crates.e2e.calls.batch_extract_files.overrides.python] result_is_vec = true arg_name_map = { paths = "items" } options_type = "ExtractionConfig" [crates.e2e.calls.batch_extract_files.overrides.r] arg_name_map = { paths = "items" } [crates.e2e.calls.batch_extract_files.overrides.php] # PHP facade exposes the async variant under the bare name (`batchExtractFiles`). [crates.e2e.calls.batch_extract_files.overrides.zig] function = "batch_extract_files_sync" async = false [crates.e2e.calls.batch_extract_files_sync] function = "batch_extract_files_sync" module = "kreuzberg" async = false returns_result = true result_is_vec = true skip_languages = ["wasm"] args = [ { name = "paths", field = "input.paths", type = "json_object", owned = true, go_type = "BatchFileItem", element_type = "BatchFileItem" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.batch_extract_files_sync.overrides.csharp] result_is_vec = true options_type = "ExtractionConfig" [crates.e2e.calls.batch_extract_files_sync.overrides.java] result_is_vec = true options_type = "ExtractionConfig" enum_fields = { outputFormat = "OutputFormat" } [crates.e2e.calls.batch_extract_files_sync.overrides.python] result_is_vec = true arg_name_map = { paths = "items" } options_type = "ExtractionConfig" [crates.e2e.calls.batch_extract_files_sync.overrides.r] arg_name_map = { paths = "items" } [crates.e2e.calls.reduce_tokens.overrides.go] result_is_simple = true options_type = "TokenReductionConfig" [crates.e2e.calls.reduce_tokens.overrides.node] result_is_simple = true options_type = "JsTokenReductionConfig" [crates.e2e.calls.reduce_tokens.overrides.csharp] result_is_simple = true options_type = "TokenReductionConfig" [crates.e2e.calls.reduce_tokens.overrides.python] result_is_simple = true options_type = "TokenReductionConfig" [crates.e2e.calls.reduce_tokens.overrides.java] result_is_simple = true options_type = "TokenReductionConfig" [crates.e2e.calls.serialize_to_toon.overrides.node] result_is_simple = true options_type = "ExtractionConfig" [crates.e2e.calls.serialize_to_toon.overrides.csharp] result_is_simple = true options_type = "ExtractionConfig" [crates.e2e.calls.serialize_to_json.overrides.node] result_is_simple = true options_type = "ExtractionConfig" [crates.e2e.calls.serialize_to_json.overrides.csharp] result_is_simple = true options_type = "ExtractionConfig" [crates.e2e.calls.detect_or_validate.overrides.go] returns_result = true [crates.e2e.calls.embed_texts] result_is_simple = true result_is_array = true function = "embed_texts" module = "kreuzberg" async = false returns_result = true skip_languages = ["wasm"] options_type = "EmbeddingConfig" args = [ { name = "texts", field = "input.texts", type = "json_object", owned = true, element_type = "String" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.embed_texts.overrides.csharp] result_is_simple = true options_type = "EmbeddingConfig" nested_types = { embedding_model = "EmbeddingModelType" } [crates.e2e.calls.embed_texts.overrides.java] result_is_simple = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts.overrides.go] result_is_simple = true result_is_array = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts.overrides.php] result_is_simple = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts.overrides.dart] result_is_simple = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts.overrides.swift] options_type = "EmbeddingConfig" # Zig returns `[]u8` (JSON) for Vec>; assertions are rendered against # the parsed JSON value rather than struct fields. [crates.e2e.calls.embed_texts.overrides.zig] result_is_json_struct = true [crates.e2e.calls.batch_extract_bytes] function = "batch_extract_bytes" module = "kreuzberg" async = true returns_result = true result_is_vec = true skip_languages = ["wasm"] args = [ { name = "items", field = "input.items", type = "json_object", owned = true, go_type = "BatchBytesItem", element_type = "BatchBytesItem" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.batch_extract_bytes.overrides.csharp] result_is_vec = true options_type = "ExtractionConfig" [crates.e2e.calls.batch_extract_bytes.overrides.java] result_is_vec = true options_type = "ExtractionConfig" enum_fields = { outputFormat = "OutputFormat" } [crates.e2e.calls.batch_extract_bytes.overrides.php] # PHP facade exposes the async variant under the bare name (`batchExtractBytes`). [crates.e2e.calls.batch_extract_bytes.overrides.zig] function = "batch_extract_bytes_sync" async = false [crates.e2e.calls.batch_extract_bytes_sync] function = "batch_extract_bytes_sync" module = "kreuzberg" async = false returns_result = true result_is_vec = true skip_languages = ["wasm"] args = [ { name = "items", field = "input.items", type = "json_object", owned = true, go_type = "BatchBytesItem", element_type = "BatchBytesItem" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.batch_extract_bytes_sync.overrides.csharp] result_is_vec = true options_type = "ExtractionConfig" [crates.e2e.calls.batch_extract_bytes_sync.overrides.java] result_is_vec = true options_type = "ExtractionConfig" enum_fields = { outputFormat = "OutputFormat" } [crates.e2e.calls.list_document_extractors] result_is_simple = true function = "list_document_extractors" module = "kreuzberg" async = false args = [] [crates.e2e.calls.list_document_extractors.overrides.go] result_is_simple = true returns_result = true [crates.e2e.calls.list_ocr_backends] result_is_simple = true function = "list_ocr_backends" module = "kreuzberg" async = false args = [] [crates.e2e.calls.list_ocr_backends.overrides.go] result_is_simple = true returns_result = true [crates.e2e.calls.list_post_processors] result_is_simple = true function = "list_post_processors" module = "kreuzberg" async = false args = [] [crates.e2e.calls.list_post_processors.overrides.go] result_is_simple = true returns_result = true [crates.e2e.calls.list_validators] result_is_simple = true function = "list_validators" module = "kreuzberg" async = false args = [] [crates.e2e.calls.list_validators.overrides.go] result_is_simple = true returns_result = true [crates.e2e.calls.list_embedding_backends] result_is_simple = true function = "list_embedding_backends" module = "kreuzberg" async = false args = [] [crates.e2e.calls.list_embedding_backends.overrides.go] result_is_simple = true returns_result = true [crates.e2e.calls.list_renderers] result_is_simple = true function = "list_renderers" module = "kreuzberg" async = false args = [] [crates.e2e.calls.list_renderers.overrides.go] result_is_simple = true returns_result = true [crates.e2e.calls.clear_ocr_backends] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.clear_ocr_backends.overrides.python] function = "clear_ocr_backends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.rust] function = "clear_ocr_backends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.node] function = "clearOcrBackends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.go] function = "ClearOCRBackends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.ruby] function = "clear_ocr_backends" module = "Kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.php] function = "clearOcrBackends" module = "Kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.java] function = "clearOcrBackends" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.csharp] function = "ClearOcrBackends" module = "Kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.swift] function = "clearOcrBackends" module = "Kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.dart] function = "clearOcrBackends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.wasm] function = "clearOcrBackends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.r] function = "clear_ocr_backends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.elixir] function = "clear_ocr_backends" module = "Kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.zig] function = "clear_ocr_backends" module = "kreuzberg" [crates.e2e.calls.clear_ocr_backends.overrides.kotlin_android] function = "clearAll" class = "OcrBackendBridge" [crates.e2e.calls.clear_post_processors] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.clear_post_processors.overrides.python] function = "clear_post_processors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.rust] function = "clear_post_processors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.node] function = "clearPostProcessors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.go] function = "ClearPostProcessors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.ruby] function = "clear_post_processors" module = "Kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.php] function = "clearPostProcessors" module = "Kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.java] function = "clearPostProcessors" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.csharp] function = "ClearPostProcessors" module = "Kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.swift] function = "clearPostProcessors" module = "Kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.dart] function = "clearPostProcessors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.wasm] function = "clearPostProcessors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.r] function = "clear_post_processors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.elixir] function = "clear_post_processors" module = "Kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.zig] function = "clear_post_processors" module = "kreuzberg" [crates.e2e.calls.clear_post_processors.overrides.kotlin_android] function = "clearAll" class = "PostProcessorBridge" [crates.e2e.calls.clear_validators] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.clear_validators.overrides.python] function = "clear_validators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.rust] function = "clear_validators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.node] function = "clearValidators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.go] function = "ClearValidators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.ruby] function = "clear_validators" module = "Kreuzberg" [crates.e2e.calls.clear_validators.overrides.php] function = "clearValidators" module = "Kreuzberg" [crates.e2e.calls.clear_validators.overrides.java] function = "clearValidators" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.clear_validators.overrides.csharp] function = "ClearValidators" module = "Kreuzberg" [crates.e2e.calls.clear_validators.overrides.swift] function = "clearValidators" module = "Kreuzberg" [crates.e2e.calls.clear_validators.overrides.dart] function = "clearValidators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.wasm] function = "clearValidators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.r] function = "clear_validators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.elixir] function = "clear_validators" module = "Kreuzberg" [crates.e2e.calls.clear_validators.overrides.zig] function = "clear_validators" module = "kreuzberg" [crates.e2e.calls.clear_validators.overrides.kotlin_android] function = "clearAll" class = "ValidatorBridge" [crates.e2e.calls.unregister_ocr_backend] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. # function/module left empty so alef skips API surface validation. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [{ name = "name", field = "input.name", type = "string" }] [crates.e2e.calls.unregister_ocr_backend.overrides.python] function = "unregister_ocr_backend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.rust] function = "unregister_ocr_backend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.node] function = "unregisterOcrBackend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.go] function = "UnregisterOCRBackend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.ruby] function = "unregister_ocr_backend" module = "Kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.php] function = "unregisterOcrBackend" module = "Kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.java] function = "unregisterOcrBackend" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.csharp] function = "UnregisterOcrBackend" module = "Kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.swift] function = "unregisterOcrBackend" module = "Kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.dart] function = "unregisterOcrBackend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.wasm] function = "unregisterOcrBackend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.r] function = "unregister_ocr_backend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.elixir] function = "unregister_ocr_backend" module = "Kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.zig] function = "unregister_ocr_backend" module = "kreuzberg" [crates.e2e.calls.unregister_ocr_backend.overrides.kotlin_android] function = "unregister" class = "OcrBackendBridge" [crates.e2e.calls.unregister_post_processor] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [{ name = "name", field = "input.name", type = "string" }] [crates.e2e.calls.unregister_post_processor.overrides.python] function = "unregister_post_processor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.rust] function = "unregister_post_processor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.node] function = "unregisterPostProcessor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.go] function = "UnregisterPostProcessor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.ruby] function = "unregister_post_processor" module = "Kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.php] function = "unregisterPostProcessor" module = "Kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.java] function = "unregisterPostProcessor" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.csharp] function = "UnregisterPostProcessor" module = "Kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.swift] function = "unregisterPostProcessor" module = "Kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.dart] function = "unregisterPostProcessor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.wasm] function = "unregisterPostProcessor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.r] function = "unregister_post_processor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.elixir] function = "unregister_post_processor" module = "Kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.zig] function = "unregister_post_processor" module = "kreuzberg" [crates.e2e.calls.unregister_post_processor.overrides.kotlin_android] function = "unregister" class = "PostProcessorBridge" [crates.e2e.calls.unregister_validator] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [{ name = "name", field = "input.name", type = "string" }] [crates.e2e.calls.unregister_validator.overrides.python] function = "unregister_validator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.rust] function = "unregister_validator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.node] function = "unregisterValidator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.go] function = "UnregisterValidator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.ruby] function = "unregister_validator" module = "Kreuzberg" [crates.e2e.calls.unregister_validator.overrides.php] function = "unregisterValidator" module = "Kreuzberg" [crates.e2e.calls.unregister_validator.overrides.java] function = "unregisterValidator" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.unregister_validator.overrides.csharp] function = "UnregisterValidator" module = "Kreuzberg" [crates.e2e.calls.unregister_validator.overrides.swift] function = "unregisterValidator" module = "Kreuzberg" [crates.e2e.calls.unregister_validator.overrides.dart] function = "unregisterValidator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.wasm] function = "unregisterValidator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.r] function = "unregister_validator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.elixir] function = "unregister_validator" module = "Kreuzberg" [crates.e2e.calls.unregister_validator.overrides.zig] function = "unregister_validator" module = "kreuzberg" [crates.e2e.calls.unregister_validator.overrides.kotlin_android] function = "unregister" class = "ValidatorBridge" [crates.e2e.calls.register_ocr_backend] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.register_ocr_backend.overrides.python] function = "register_ocr_backend" module = "kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.rust] function = "register_ocr_backend" module = "kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.node] function = "registerOcrBackend" module = "kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.go] function = "RegisterOCRBackend" module = "kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.ruby] function = "register_ocr_backend" module = "Kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.php] function = "registerOcrBackend" module = "Kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.java] function = "registerOcrBackend" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.csharp] function = "RegisterOcrBackend" module = "Kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.swift] function = "registerOcrBackend" module = "Kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.dart] function = "registerOcrBackend" module = "kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.wasm] function = "registerOcrBackend" module = "kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.r] function = "register_ocr_backend" module = "kreuzberg" arg_name_map = { backend = "r_backend" } [crates.e2e.calls.register_ocr_backend.overrides.elixir] function = "register_ocr_backend" module = "Kreuzberg" [crates.e2e.calls.register_ocr_backend.overrides.zig] function = "register_ocr_backend" module = "kreuzberg" returns_result = false [crates.e2e.calls.register_ocr_backend.overrides.kotlin_android] function = "register" class = "OcrBackendBridge" [crates.e2e.calls.register_post_processor] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.register_post_processor.overrides.python] function = "register_post_processor" module = "kreuzberg" [crates.e2e.calls.register_post_processor.overrides.rust] function = "register_post_processor" module = "kreuzberg" [crates.e2e.calls.register_post_processor.overrides.node] function = "registerPostProcessor" module = "kreuzberg" [crates.e2e.calls.register_post_processor.overrides.go] function = "RegisterPostProcessor" module = "kreuzberg" [crates.e2e.calls.register_post_processor.overrides.ruby] function = "register_post_processor" module = "Kreuzberg" [crates.e2e.calls.register_post_processor.overrides.php] function = "registerPostProcessor" module = "Kreuzberg" [crates.e2e.calls.register_post_processor.overrides.java] function = "registerPostProcessor" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.register_post_processor.overrides.csharp] function = "RegisterPostProcessor" module = "Kreuzberg" [crates.e2e.calls.register_post_processor.overrides.swift] function = "registerPostProcessor" module = "Kreuzberg" [crates.e2e.calls.register_post_processor.overrides.dart] function = "registerPostProcessor" module = "kreuzberg" [crates.e2e.calls.register_post_processor.overrides.wasm] function = "registerPostProcessor" module = "kreuzberg" [crates.e2e.calls.register_post_processor.overrides.r] function = "register_post_processor" module = "kreuzberg" arg_name_map = { processor = "r_backend" } [crates.e2e.calls.register_post_processor.overrides.elixir] function = "register_post_processor" module = "Kreuzberg" [crates.e2e.calls.register_post_processor.overrides.zig] function = "register_post_processor" module = "kreuzberg" returns_result = false [crates.e2e.calls.register_post_processor.overrides.kotlin_android] function = "register" class = "PostProcessorBridge" [crates.e2e.calls.register_validator] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.register_validator.overrides.python] function = "register_validator" module = "kreuzberg" [crates.e2e.calls.register_validator.overrides.rust] function = "register_validator" module = "kreuzberg" [crates.e2e.calls.register_validator.overrides.node] function = "registerValidator" module = "kreuzberg" [crates.e2e.calls.register_validator.overrides.go] function = "RegisterValidator" module = "kreuzberg" [crates.e2e.calls.register_validator.overrides.ruby] function = "register_validator" module = "Kreuzberg" [crates.e2e.calls.register_validator.overrides.php] function = "registerValidator" module = "Kreuzberg" [crates.e2e.calls.register_validator.overrides.java] function = "registerValidator" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.register_validator.overrides.csharp] function = "RegisterValidator" module = "Kreuzberg" [crates.e2e.calls.register_validator.overrides.swift] function = "registerValidator" module = "Kreuzberg" [crates.e2e.calls.register_validator.overrides.dart] function = "registerValidator" module = "kreuzberg" [crates.e2e.calls.register_validator.overrides.wasm] function = "registerValidator" module = "kreuzberg" [crates.e2e.calls.register_validator.overrides.r] function = "register_validator" module = "kreuzberg" arg_name_map = { validator = "r_backend" } [crates.e2e.calls.register_validator.overrides.elixir] function = "register_validator" module = "Kreuzberg" [crates.e2e.calls.register_validator.overrides.zig] function = "register_validator" module = "kreuzberg" returns_result = false [crates.e2e.calls.register_validator.overrides.kotlin_android] function = "register" class = "ValidatorBridge" [crates.e2e.calls.register_embedding_backend] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.register_embedding_backend.overrides.python] function = "register_embedding_backend" module = "kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.rust] function = "register_embedding_backend" module = "kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.node] function = "registerEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.go] function = "RegisterEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.ruby] function = "register_embedding_backend" module = "Kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.php] function = "registerEmbeddingBackend" module = "Kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.java] function = "registerEmbeddingBackend" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.csharp] function = "RegisterEmbeddingBackend" module = "Kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.swift] function = "registerEmbeddingBackend" module = "Kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.dart] function = "registerEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.wasm] function = "registerEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.r] function = "register_embedding_backend" module = "kreuzberg" arg_name_map = { backend = "r_backend" } [crates.e2e.calls.register_embedding_backend.overrides.elixir] function = "register_embedding_backend" module = "Kreuzberg" [crates.e2e.calls.register_embedding_backend.overrides.zig] function = "register_embedding_backend" module = "kreuzberg" returns_result = false [crates.e2e.calls.register_embedding_backend.overrides.kotlin_android] function = "register" class = "EmbeddingBackendBridge" [crates.e2e.calls.unregister_embedding_backend] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [{ name = "name", field = "input.name", type = "string" }] [crates.e2e.calls.unregister_embedding_backend.overrides.python] function = "unregister_embedding_backend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.rust] function = "unregister_embedding_backend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.node] function = "unregisterEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.go] function = "UnregisterEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.ruby] function = "unregister_embedding_backend" module = "Kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.php] function = "unregisterEmbeddingBackend" module = "Kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.java] function = "unregisterEmbeddingBackend" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.csharp] function = "UnregisterEmbeddingBackend" module = "Kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.swift] function = "unregisterEmbeddingBackend" module = "Kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.dart] function = "unregisterEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.wasm] function = "unregisterEmbeddingBackend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.r] function = "unregister_embedding_backend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.elixir] function = "unregister_embedding_backend" module = "Kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.zig] function = "unregister_embedding_backend" module = "kreuzberg" [crates.e2e.calls.unregister_embedding_backend.overrides.kotlin_android] function = "unregister" class = "EmbeddingBackendBridge" [crates.e2e.calls.clear_embedding_backends] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.clear_embedding_backends.overrides.python] function = "clear_embedding_backends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.rust] function = "clear_embedding_backends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.node] function = "clearEmbeddingBackends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.go] function = "ClearEmbeddingBackends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.ruby] function = "clear_embedding_backends" module = "Kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.php] function = "clearEmbeddingBackends" module = "Kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.java] function = "clearEmbeddingBackends" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.csharp] function = "ClearEmbeddingBackends" module = "Kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.swift] function = "clearEmbeddingBackends" module = "Kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.dart] function = "clearEmbeddingBackends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.wasm] function = "clearEmbeddingBackends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.r] function = "clear_embedding_backends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.elixir] function = "clear_embedding_backends" module = "Kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.zig] function = "clear_embedding_backends" module = "kreuzberg" [crates.e2e.calls.clear_embedding_backends.overrides.kotlin_android] function = "clearAll" class = "EmbeddingBackendBridge" [crates.e2e.calls.register_document_extractor] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.register_document_extractor.overrides.python] function = "register_document_extractor" module = "kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.rust] function = "register_document_extractor" module = "kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.node] function = "registerDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.go] function = "RegisterDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.ruby] function = "register_document_extractor" module = "Kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.php] function = "registerDocumentExtractor" module = "Kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.java] function = "registerDocumentExtractor" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.csharp] function = "RegisterDocumentExtractor" module = "Kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.swift] function = "registerDocumentExtractor" module = "Kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.dart] function = "registerDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.wasm] function = "registerDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.r] function = "register_document_extractor" module = "kreuzberg" arg_name_map = { extractor = "r_backend" } [crates.e2e.calls.register_document_extractor.overrides.elixir] function = "register_document_extractor" module = "Kreuzberg" [crates.e2e.calls.register_document_extractor.overrides.zig] function = "register_document_extractor" module = "kreuzberg" returns_result = false [crates.e2e.calls.register_document_extractor.overrides.kotlin_android] function = "register" class = "DocumentExtractorBridge" [crates.e2e.calls.unregister_document_extractor] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [{ name = "name", field = "input.name", type = "string" }] [crates.e2e.calls.unregister_document_extractor.overrides.python] function = "unregister_document_extractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.rust] function = "unregister_document_extractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.node] function = "unregisterDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.go] function = "UnregisterDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.ruby] function = "unregister_document_extractor" module = "Kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.php] function = "unregisterDocumentExtractor" module = "Kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.java] function = "unregisterDocumentExtractor" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.csharp] function = "UnregisterDocumentExtractor" module = "Kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.swift] function = "unregisterDocumentExtractor" module = "Kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.dart] function = "unregisterDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.wasm] function = "unregisterDocumentExtractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.r] function = "unregister_document_extractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.elixir] function = "unregister_document_extractor" module = "Kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.zig] function = "unregister_document_extractor" module = "kreuzberg" [crates.e2e.calls.unregister_document_extractor.overrides.kotlin_android] function = "unregister" class = "DocumentExtractorBridge" [crates.e2e.calls.clear_document_extractors] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.clear_document_extractors.overrides.python] function = "clear_document_extractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.rust] function = "clear_document_extractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.node] function = "clearDocumentExtractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.go] function = "ClearDocumentExtractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.ruby] function = "clear_document_extractors" module = "Kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.php] function = "clearDocumentExtractors" module = "Kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.java] function = "clearDocumentExtractors" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.csharp] function = "ClearDocumentExtractors" module = "Kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.swift] function = "clearDocumentExtractors" module = "Kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.dart] function = "clearDocumentExtractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.wasm] function = "clearDocumentExtractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.r] function = "clear_document_extractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.elixir] function = "clear_document_extractors" module = "Kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.zig] function = "clear_document_extractors" module = "kreuzberg" [crates.e2e.calls.clear_document_extractors.overrides.kotlin_android] function = "clearAll" class = "DocumentExtractorBridge" [crates.e2e.calls.register_renderer] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.register_renderer.overrides.python] function = "register_renderer" module = "kreuzberg" [crates.e2e.calls.register_renderer.overrides.rust] function = "register_renderer" module = "kreuzberg" [crates.e2e.calls.register_renderer.overrides.node] function = "registerRenderer" module = "kreuzberg" [crates.e2e.calls.register_renderer.overrides.go] function = "RegisterRenderer" module = "kreuzberg" [crates.e2e.calls.register_renderer.overrides.ruby] function = "register_renderer" module = "Kreuzberg" [crates.e2e.calls.register_renderer.overrides.php] function = "registerRenderer" module = "Kreuzberg" [crates.e2e.calls.register_renderer.overrides.java] function = "registerRenderer" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.register_renderer.overrides.csharp] function = "RegisterRenderer" module = "Kreuzberg" [crates.e2e.calls.register_renderer.overrides.swift] function = "registerRenderer" module = "Kreuzberg" [crates.e2e.calls.register_renderer.overrides.dart] function = "registerRenderer" module = "kreuzberg" [crates.e2e.calls.register_renderer.overrides.wasm] function = "registerRenderer" module = "kreuzberg" [crates.e2e.calls.register_renderer.overrides.r] function = "register_renderer" module = "kreuzberg" arg_name_map = { renderer = "r_backend" } [crates.e2e.calls.register_renderer.overrides.elixir] function = "register_renderer" module = "Kreuzberg" [crates.e2e.calls.register_renderer.overrides.zig] function = "register_renderer" module = "kreuzberg" returns_result = false [crates.e2e.calls.register_renderer.overrides.kotlin_android] function = "register" class = "RendererBridge" [crates.e2e.calls.unregister_renderer] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [{ name = "name", field = "input.name", type = "string" }] [crates.e2e.calls.unregister_renderer.overrides.python] function = "unregister_renderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.rust] function = "unregister_renderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.node] function = "unregisterRenderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.go] function = "UnregisterRenderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.ruby] function = "unregister_renderer" module = "Kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.php] function = "unregisterRenderer" module = "Kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.java] function = "unregisterRenderer" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.csharp] function = "UnregisterRenderer" module = "Kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.swift] function = "unregisterRenderer" module = "Kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.dart] function = "unregisterRenderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.wasm] function = "unregisterRenderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.r] function = "unregister_renderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.elixir] function = "unregister_renderer" module = "Kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.zig] function = "unregister_renderer" module = "kreuzberg" [crates.e2e.calls.unregister_renderer.overrides.kotlin_android] function = "unregister" class = "RendererBridge" [crates.e2e.calls.clear_renderers] # Trait-bridge function — excluded from gen_function surface; bridges emit it directly. result_is_simple = true function = "" module = "" async = false returns_result = false returns_void = true args = [] [crates.e2e.calls.clear_renderers.overrides.python] function = "clear_renderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.rust] function = "clear_renderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.node] function = "clearRenderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.go] function = "ClearRenderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.ruby] function = "clear_renderers" module = "Kreuzberg" [crates.e2e.calls.clear_renderers.overrides.php] function = "clearRenderers" module = "Kreuzberg" [crates.e2e.calls.clear_renderers.overrides.java] function = "clearRenderers" module = "dev.kreuzberg.Kreuzberg" [crates.e2e.calls.clear_renderers.overrides.csharp] function = "ClearRenderers" module = "Kreuzberg" [crates.e2e.calls.clear_renderers.overrides.swift] function = "clearRenderers" module = "Kreuzberg" [crates.e2e.calls.clear_renderers.overrides.dart] function = "clearRenderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.wasm] function = "clearRenderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.r] function = "clear_renderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.elixir] function = "clear_renderers" module = "Kreuzberg" [crates.e2e.calls.clear_renderers.overrides.zig] function = "clear_renderers" module = "kreuzberg" [crates.e2e.calls.clear_renderers.overrides.kotlin_android] function = "clearAll" class = "RendererBridge" [crates.e2e.calls.embed_texts_async] result_is_simple = true result_is_array = true function = "embed_texts_async" module = "kreuzberg" async = true returns_result = true skip_languages = ["wasm", "swift"] options_type = "EmbeddingConfig" args = [ { name = "texts", field = "input.texts", type = "json_object", owned = true, element_type = "String" }, { name = "config", field = "input.config", type = "json_object", optional = true }, ] [crates.e2e.calls.embed_texts_async.overrides.csharp] result_is_simple = true options_type = "EmbeddingConfig" nested_types = { embedding_model = "EmbeddingModelType" } [crates.e2e.calls.embed_texts_async.overrides.java] result_is_simple = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts_async.overrides.go] result_is_simple = true result_is_array = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts_async.overrides.php] result_is_simple = true [crates.e2e.calls.embed_texts_async.overrides.dart] result_is_simple = true options_type = "EmbeddingConfig" [crates.e2e.calls.embed_texts_async.overrides.swift] options_type = "EmbeddingConfig" [crates.e2e.calls.get_embedding_preset] result_is_simple = true result_is_option = true function = "get_embedding_preset" module = "kreuzberg" async = false returns_result = false args = [{ name = "name", field = "input.preset_name", type = "string" }] [crates.e2e.calls.list_embedding_presets] result_is_simple = true result_is_array = true function = "list_embedding_presets" module = "kreuzberg" async = false returns_result = false args = [] [crates.e2e.calls.detect_mime_type] result_is_simple = true function = "detect_mime_type" module = "kreuzberg" async = false returns_result = true args = [ { name = "path", field = "input.path", type = "string" }, { name = "check_exists", field = "input.check_exists", type = "bool", optional = true }, ] [crates.e2e.calls.render_pdf_page_to_png] result_is_bytes = true result_is_simple = true function = "render_pdf_page_to_png" module = "kreuzberg" async = false returns_result = true skip_languages = ["wasm"] args = [ { name = "pdf_bytes", field = "input.pdf_bytes", type = "bytes" }, { name = "page_index", field = "input.page_index", type = "int" }, ] [crates.e2e.calls.render_pdf_page_to_png.overrides.rust] result_is_bytes = true result_is_simple = true extra_args = ["None", "None"] [crates.e2e.calls.render_pdf_page_to_png.overrides.csharp] result_is_bytes = true result_is_simple = true extra_args = ["null", "null"] [crates.e2e.calls.render_pdf_page_to_png.overrides.go] result_is_bytes = true result_is_simple = true result_is_array = true extra_args = ["nil", "nil"] [crates.e2e.calls.render_pdf_page_to_png.overrides.java] result_is_bytes = true result_is_simple = true extra_args = ["null", "null"] [crates.e2e.calls.render_pdf_page_to_png.overrides.zig] result_is_bytes = true result_is_simple = true extra_args = ["null", "null"] [crates.e2e.calls.render_pdf_page_to_png.overrides.r] result_is_bytes = true result_is_simple = true extra_args = ["NULL", "NULL"] [crates.e2e.calls.render_pdf_page_to_png.overrides.swift] result_is_bytes = true result_is_simple = true extra_args = ["dpi: nil", "password: nil"] [crates.e2e.calls.bold.overrides.python] function = "bold" [crates.e2e.calls.italic.overrides.python] function = "italic" [crates.e2e.calls.link.overrides.python] function = "link" [crates.e2e.calls.code_markup.overrides.python] function = "code_markup" [crates.e2e.calls.strikethrough.overrides.python] function = "strikethrough" [crates.e2e.calls.underline.overrides.python] function = "underline" [crates.e2e.calls.classify_uri.overrides.python] function = "classify_uri" [crates.e2e.calls.clean_extracted_text.overrides.python] function = "clean_extracted_text" [crates.e2e.calls.escape_html_entities.overrides.python] function = "escape_html_entities" [crates.e2e.calls.fix_mojibake.overrides.python] function = "fix_mojibake" [crates.e2e.calls.is_valid_utf8.overrides.python] function = "is_valid_utf8" [crates.e2e.calls.convert_html_to_markdown.overrides.python] function = "convert_html_to_markdown" [crates.e2e.calls.generate_cache_key.overrides.python] function = "generate_cache_key" [crates.e2e.calls.validate_cache_key.overrides.python] function = "validate_cache_key" [crates.e2e.calls.detect_image_format.overrides.python] function = "detect_image_format" [crates.e2e.calls.is_valid_format_field.overrides.python] function = "is_valid_format_field" [crates.e2e.calls.normalize_vec.overrides.python] function = "normalize_vec" [crates.e2e.calls.list_supported_formats.overrides.python] function = "list_supported_formats" [crates.e2e.calls.validate_confidence_fn.overrides.python] function = "validate_confidence_fn" [crates.e2e.calls.validate_port.overrides.python] function = "validate_port" [crates.e2e.calls.validate_output_format_fn.overrides.python] function = "validate_output_format_fn" [crates.e2e.calls.validate_mime_type.overrides.python] function = "validate_mime_type" [crates.e2e.calls.validate_tesseract_oem.overrides.python] function = "validate_tesseract_oem" [crates.e2e.calls.validate_tesseract_psm.overrides.python] function = "validate_tesseract_psm" [crates.e2e.calls.validate_token_reduction_level.overrides.python] function = "validate_token_reduction_level" [crates.e2e.calls.validate_language_code.overrides.python] function = "validate_language_code" [crates.e2e.calls.validate_ocr_backend.overrides.python] function = "validate_ocr_backend" [crates.e2e.calls.validate_binarization_method.overrides.python] function = "validate_binarization_method" [crates.e2e.calls.validate_host.overrides.python] function = "validate_host" [crates.e2e.calls.extract_doc_text.overrides.python] function = "extract_doc_text" [crates.e2e.calls.extract_email_content.overrides.python] function = "extract_email_content" [crates.e2e.calls.extract_pptx_from_bytes.overrides.python] function = "extract_pptx_from_bytes" [crates.e2e.calls.extract_text_from_pdf.overrides.python] function = "extract_text_from_pdf" [crates.e2e.packages.rust] path = "../../crates/kreuzberg" [crates.e2e.packages.python] name = "kreuzberg" path = "../../packages/python" [crates.e2e.packages.node] name = "@kreuzberg/node" path = "../../crates/kreuzberg-node" [crates.e2e.packages.go] module = "github.com/kreuzberg-dev/kreuzberg/v5" path = "../../packages/go/v5" [crates.e2e.packages.csharp] name = "Kreuzberg" path = "../../packages/csharp/Kreuzberg/Kreuzberg.csproj" [crates.e2e.packages.java] module = "dev.kreuzberg" path = "../../packages/java" [crates.e2e.packages.ruby] name = "kreuzberg" path = "../../packages/ruby" [crates.e2e.packages.elixir] module = "Kreuzberg" path = "../../packages/elixir" [crates.e2e.packages.php] name = "kreuzberg-dev/kreuzberg" path = "../../packages/php" [crates.e2e.packages.r] module = "kreuzberg" path = "../../packages/r" [crates.e2e.packages.dart] path = "../../packages/dart" [crates.e2e.packages.kotlin_android] module = "dev.kreuzberg" path = "../../packages/kotlin-android" [crates.e2e.packages.swift] path = "../../packages/swift" [crates.e2e.packages.zig] path = "../../packages/zig" # Point WASM e2e at the wasm-pack `nodejs` target. The default `pkg/` is the # bundler target whose JS glue uses `import * as foo from "env"` statements # (unresolved namespace imports for getrandom/wasi-bound symbols), which Node # cannot satisfy at import time. The `pkg/nodejs/` variant inlines those # imports as `require()` calls handled by wasm-bindgen's Node loader. [crates.e2e.packages.wasm] name = "@kreuzberg/wasm" path = "../../crates/kreuzberg-wasm/pkg/nodejs" [crates.e2e.format] python = "ruff check --fix {dir} && ruff format {dir}" go = "gofmt -w {dir}" rust = "(cd {dir} && cargo fmt --all)" [crates.e2e.fields] chunks_have_content = "chunks_have_content" [crates.e2e.fields_c_types] "extraction_result.metadata" = "Metadata" [crates.e2e.registry] output = "test_apps" [crates.scaffold] description = "High-performance document intelligence library" license = "Elastic-2.0" repository = "https://github.com/kreuzberg-dev/kreuzberg" homepage = "https://kreuzberg.dev" authors = ["Na'aman Hirschfeld "] keywords = ["document", "extraction", "pdf", "ocr", "text"] [crates.scaffold.cargo] [crates.readme] template_dir = "templates/readme" snippets_dir = "docs/snippets" output_pattern = "packages/{language}/README.md" discord_url = "https://discord.gg/xt9WY3GnKR" banner_url = "https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" [crates.readme.languages.rust] template = "root.md" name = "Kreuzberg" output_path = "README.md" [crates.readme.languages.python] template = "python.md" name = "Python" package_name = "kreuzberg" badge_url = "https://img.shields.io/pypi/v/kreuzberg?label=Python&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Native Python bindings with async/await support, multiple OCR backends (Tesseract, EasyOCR, PaddleOCR), and extensible plugin system." package_manager = ["pip"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "easyocr", "paddleocr"] optional_sections = ["async_vs_sync_performance", "ocr_backends", "system_requirements"] snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/extract_with_ocr.md", ocr_configuration = "getting-started/hello_world.md", table_extraction = "getting-started/read_content.md" } [crates.readme.languages.typescript] template = "language_package.md" output_path = "crates/kreuzberg-node/README.md" name = "TypeScript (Node.js)" package_name = "@kreuzberg/node" badge_url = "https://img.shields.io/npm/v/@kreuzberg/node?label=TypeScript&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions." package_manager = ["pnpm"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, worker_pools = true, config_discovery = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = [ "native_vs_wasm_comparison", "ocr_support", "performance", "napi_implementation", ] snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file_async.md", batch_processing = "getting-started/batch_extract_files_sync.md", ocr_configuration = "config/config_ocr.md", table_extraction = "api/tables.md", config_discovery = "config/config_discovery.md", worker_pool = "advanced/worker_pool.md" } [crates.readme.languages.go] template = "go.md" output_path = "packages/go/v5/README.md" name = "Go" package_name = "github.com/kreuzberg-dev/kreuzberg/v5" badge_url = "https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Go bindings with context-aware async support, idiomatic Go API, and CGO-based native performance." package_manager = ["go get"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements", "ffi_build_instructions"] snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/extract_with_ocr.md", ocr_configuration = "getting-started/hello_world.md" } [crates.readme.languages.java] template = "language_package.md" name = "Java" package_name = "dev.kreuzberg:kreuzberg" badge_url = "https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Java bindings with type-safe API, Foreign Function & Memory API integration, and native performance." package_manager = ["maven", "gradle"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["maven_gradle_setup", "system_requirements"] snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md", configuration = "config/config_basic.md" } [crates.readme.languages.php] template = "language_package.md" name = "PHP" package_name = "kreuzberg/kreuzberg" badge_url = "https://img.shields.io/packagist/v/kreuzberg/kreuzberg?label=PHP&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. PHP bindings with modern PHP 8.2+ support and type-safe API." package_manager = ["composer"] features = { ocr = true, async = false, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["composer_installation", "system_requirements"] snippets = { basic_extraction = "quickstart/basic_extraction_oop.php", batch_processing = "extraction/batch_processing.php", ocr_configuration = "ocr/basic_ocr.php" } [crates.readme.languages.ruby] template = "ruby.md" name = "Ruby" package_name = "kreuzberg" badge_url = "https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance." package_manager = ["rubygems", "bundler"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements", "development_setup"] snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/install_verify.md", ocr_configuration = "getting-started/extract_with_ocr.md" } [crates.readme.languages.csharp] template = "language_package.md" name = "C#" package_name = "Kreuzberg" badge_url = "https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. .NET bindings with full type safety, async/await support, and .NET 10.0+ compatibility." package_manager = ["nuget"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["dotnet_installation", "system_requirements"] snippets = { basic_extraction = "getting-started/basic_usage.cs", async_extraction = "advanced/async_extraction.cs", batch_processing = "advanced/batch_processing.cs", ocr_configuration = "ocr/tesseract_backend.cs" } [crates.readme.languages.elixir] template = "language_package.md" name = "Elixir" package_name = "kreuzberg" badge_url = "https://img.shields.io/hexpm/v/kreuzberg?label=Elixir&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Elixir bindings with native BEAM concurrency, OTP integration, and idiomatic Elixir API." package_manager = ["mix"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["mix_installation", "async_with_tasks", "plugin_system"] snippets = { basic_extraction = "getting-started/basic_usage.exs", async_extraction = "getting-started/extract_file.exs", batch_processing = "core/batch_extract_files_sync.exs", ocr_configuration = "ocr/tesseract_basic.exs", plugin_system = "plugins/word_count_processor.exs" } [crates.readme.languages.wasm] template = "language_package.md" output_path = "crates/kreuzberg-wasm/README.md" name = "WebAssembly" package_name = "@kreuzberg/wasm" badge_url = "https://img.shields.io/npm/v/@kreuzberg/wasm?label=WASM&color=007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. WebAssembly bindings for browsers, Deno, and Cloudflare Workers with portable deployment and multi-threading support." package_manager = ["pnpm"] features = { ocr = true, async = true, plugin_system = true, embeddings = false, code_intelligence = true } ocr_backends = ["tesseract-wasm"] optional_sections = ["multi_threading", "ocr_support", "performance_comparison"] snippets = { basic_extraction = "getting-started/basic-extract.ts", browser_extraction = "getting-started/browser-file-input.ts", node_extraction = "getting-started/basic-extract.ts", async_extraction = "getting-started/async-extraction.ts", cloudflare_workers = "getting-started/runtime-detection.ts", batch_processing = "getting-started/batch-processing.ts", ocr_configuration = "ocr/enable-ocr.ts" } [crates.readme.languages.r] template = "language_package.md" name = "R" package_name = "kreuzberg" badge_url = "https://img.shields.io/badge/R-kreuzberg-007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. R bindings with native R API, data frame integration, and high-performance document extraction." package_manager = ["install.packages"] features = { ocr = true, async = false, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements"] snippets = { basic_extraction = "getting-started/basic_extraction.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/extract_with_ocr.md", ocr_configuration = "ocr/ocr_extraction.md" } [crates.readme.languages.ffi] template = "language_package.md" output_path = "crates/kreuzberg-ffi/README.md" name = "FFI (C/C++)" package_name = "kreuzberg-ffi" badge_url = "https://img.shields.io/badge/C%2FC%2B%2B-kreuzberg--ffi-007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. C/C++ FFI bindings providing a stable ABI for native integration, shared library distribution, and cross-language interop." package_manager = ["cargo"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } [crates.readme.languages.kotlin_android] template = "language_package.md" name = "Kotlin (Android)" package_name = "dev.kreuzberg:kreuzberg-android" badge_url = "https://img.shields.io/badge/Android-kreuzberg-3DDC84" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Android library (AAR) with bundled jniLibs/arm64-v8a and jniLibs/x86_64 — Gradle automatically picks up the native cdylib for emulator and device builds. Server-side Kotlin/JVM consumers can use the Java binding directly via standard Kotlin/Java interop." package_manager = ["gradle", "maven"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements"] snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" } [crates.readme.languages.swift] template = "language_package.md" name = "Swift" package_name = "Kreuzberg" badge_url = "https://img.shields.io/badge/Swift-kreuzberg-007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Swift bindings via swift-bridge for macOS, iOS, and Linux, with native Swift types and async/await support." package_manager = ["spm"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements"] snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" } [crates.readme.languages.dart] template = "language_package.md" name = "Dart / Flutter" package_name = "kreuzberg" badge_url = "https://img.shields.io/badge/Dart-kreuzberg-007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Dart bindings via flutter_rust_bridge for both Flutter apps and pure-Dart server contexts, with isolate-safe Future/Stream APIs." package_manager = ["pub"] features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements"] snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" } [crates.readme.languages.zig] template = "language_package.md" name = "Zig" package_name = "kreuzberg" badge_url = "https://img.shields.io/badge/Zig-kreuzberg-007ec6" docs_url = "https://kreuzberg.dev/" description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Zig bindings consuming the C FFI surface via @cImport, idiomatic error sets, optional types, and slice-based memory management." package_manager = ["zig"] features = { ocr = true, async = false, plugin_system = true, embeddings = true, code_intelligence = true } ocr_backends = ["tesseract", "paddleocr"] optional_sections = ["system_requirements"] snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" } [[crates.trait_bridges]] trait_name = "OcrBackend" super_trait = "kreuzberg::plugins::Plugin" registry_getter = "kreuzberg::plugins::registry::get_ocr_backend_registry" register_fn = "register_ocr_backend" unregister_fn = "unregister_ocr_backend" clear_fn = "clear_ocr_backends" [[crates.trait_bridges]] trait_name = "PostProcessor" super_trait = "kreuzberg::plugins::Plugin" registry_getter = "kreuzberg::plugins::registry::get_post_processor_registry" register_fn = "register_post_processor" unregister_fn = "unregister_post_processor" clear_fn = "clear_post_processors" [[crates.trait_bridges]] trait_name = "Validator" super_trait = "kreuzberg::plugins::Plugin" registry_getter = "kreuzberg::plugins::registry::get_validator_registry" register_fn = "register_validator" unregister_fn = "unregister_validator" clear_fn = "clear_validators" [[crates.trait_bridges]] trait_name = "EmbeddingBackend" super_trait = "kreuzberg::plugins::Plugin" registry_getter = "kreuzberg::plugins::registry::get_embedding_backend_registry" register_fn = "register_embedding_backend" unregister_fn = "unregister_embedding_backend" clear_fn = "clear_embedding_backends" [[crates.trait_bridges]] trait_name = "DocumentExtractor" super_trait = "kreuzberg::plugins::Plugin" registry_getter = "kreuzberg::plugins::registry::get_document_extractor_registry" register_fn = "register_document_extractor" unregister_fn = "unregister_document_extractor" clear_fn = "clear_document_extractors" # `as_sync_extractor` returns `Option<&dyn SyncExtractor>` — a trait-object reference that # cannot traverse the C FFI boundary. Skip it in the FFI vtable and let the default impl # (returns None) take over. ffi_skip_methods = ["as_sync_extractor"] [[crates.trait_bridges]] trait_name = "Renderer" super_trait = "kreuzberg::plugins::Plugin" registry_getter = "kreuzberg::plugins::registry::get_renderer_registry" register_fn = "register_renderer" unregister_fn = "unregister_renderer" clear_fn = "clear_renderers"