Files
fil/alef.toml

3288 lines
109 KiB
TOML
Raw Normal View History

2026-06-01 23:40:55 +02:00
[workspace]
alef_version = "0.21.0"
languages = [
"python",
"node",
"ruby",
"php",
"ffi",
"go",
"java",
"csharp",
"elixir",
"wasm",
"r",
"dart",
"kotlin_android",
"swift",
"zig",
]
# ---------------------------------------------------------------------------
# Tool selection
# ---------------------------------------------------------------------------
# Force regen: Java bytes marshalling + FormatMetadata JsonUnwrapped fix
[workspace.tools]
python_package_manager = "uv"
node_package_manager = "pnpm"
# ---------------------------------------------------------------------------
# DTO styles
# ---------------------------------------------------------------------------
[workspace.dto]
python = "dataclass"
python_output = "typed-dict"
node = "interface"
ruby = "struct"
php = "readonly-class"
elixir = "struct"
go = "struct"
java = "record"
csharp = "record"
r = "list"
# ---------------------------------------------------------------------------
# Generation control
# ---------------------------------------------------------------------------
[workspace.generate]
bindings = true
errors = true
configs = true
async_wrappers = true
type_conversions = true
package_metadata = true
public_api = true
[workspace.generate_overrides.wasm]
async_wrappers = false
[workspace.format]
enabled = false
# Disable the default format_generated pass for node (pnpm dlx oxfmt .) which runs
# from the repo root and chokes on vendored HTML fixtures and Helm chart YAML.
# Post-generation formatting is handled by [crates.lint.node] format command instead.
[workspace.format_overrides.node]
enabled = false
# ---------------------------------------------------------------------------
# Version synchronization
# ---------------------------------------------------------------------------
[workspace.sync]
extra_paths = [
"packages/python/kreuzberg/__init__.py",
"packages/ruby/lib/kreuzberg/version.rb",
"packages/ruby/ext/kreuzberg_rb/native/Cargo.toml",
"crates/kreuzberg-node/package.json",
"crates/kreuzberg-node/npm/*/package.json",
"packages/go/v5/go.mod",
"Cargo.toml",
]
[[crates]]
name = "kreuzberg"
core_import = "kreuzberg"
error_type = "KreuzbergError"
error_constructor = "kreuzberg::KreuzbergError::Other({msg})"
version_from = "Cargo.toml"
workspace_root = "."
sources = [
# Public API surface — only files with types/functions re-exported from lib.rs
"crates/kreuzberg/src/lib.rs",
"crates/kreuzberg/src/error.rs",
# Public types (re-exported via `pub use types::*`)
"crates/kreuzberg/src/types/extraction.rs",
"crates/kreuzberg/src/types/metadata.rs",
"crates/kreuzberg/src/types/page.rs",
"crates/kreuzberg/src/types/tables.rs",
"crates/kreuzberg/src/types/annotations.rs",
"crates/kreuzberg/src/types/document_structure.rs",
"crates/kreuzberg/src/types/ocr_elements.rs",
"crates/kreuzberg/src/types/formats.rs",
"crates/kreuzberg/src/types/uri.rs",
"crates/kreuzberg/src/types/djot.rs",
"crates/kreuzberg/src/types/internal.rs",
# Config types (re-exported via `pub use core::config::*`)
"crates/kreuzberg/src/core/config/mod.rs",
"crates/kreuzberg/src/core/config/extraction/types.rs",
"crates/kreuzberg/src/core/config/extraction/core.rs",
"crates/kreuzberg/src/core/config/extraction/file_config.rs",
"crates/kreuzberg/src/core/config/ocr.rs",
"crates/kreuzberg/src/core/config/page.rs",
"crates/kreuzberg/src/core/config/pdf.rs",
"crates/kreuzberg/src/core/config/html_output.rs",
"crates/kreuzberg/src/core/config/layout.rs",
"crates/kreuzberg/src/core/config/acceleration.rs",
"crates/kreuzberg/src/core/config/llm.rs",
"crates/kreuzberg/src/core/config/content_filter.rs",
"crates/kreuzberg/src/core/config/tree_sitter.rs",
"crates/kreuzberg/src/core/config/email.rs",
"crates/kreuzberg/src/core/config/formats.rs",
# Public MIME/format detection functions
"crates/kreuzberg/src/core/mime.rs",
"crates/kreuzberg/src/core/formats.rs",
# Plugin trait (for trait bridges)
"crates/kreuzberg/src/plugins/traits.rs",
"crates/kreuzberg/src/plugins/embedding.rs",
"crates/kreuzberg/src/plugins/registry/mod.rs",
# Public module APIs
"crates/kreuzberg/src/rendering/mod.rs",
"crates/kreuzberg/src/keywords/mod.rs",
"crates/kreuzberg/src/chunking/mod.rs",
"crates/kreuzberg/src/embeddings/mod.rs",
"crates/kreuzberg/src/language_detection/mod.rs",
"crates/kreuzberg/src/cache/core.rs",
# Batch extraction functions (re-exported from lib.rs)
"crates/kreuzberg/src/core/extractor/mod.rs",
"crates/kreuzberg/src/core/extractor/sync.rs",
# PDF rendering (render_pdf_page_to_png — re-exported from lib.rs)
"crates/kreuzberg/src/pdf/render.rs",
# PdfMetadata — referenced in types/metadata.rs as Pdf(PdfMetadata) variant
"crates/kreuzberg/src/pdf/metadata.rs",
# Office metadata types (used in metadata.rs)
"crates/kreuzberg/src/extraction/office_metadata/mod.rs",
"crates/kreuzberg/src/extraction/office_metadata/core_properties.rs",
"crates/kreuzberg/src/extraction/office_metadata/app_properties.rs",
"crates/kreuzberg/src/extraction/office_metadata/custom_properties.rs",
"crates/kreuzberg/src/extraction/office_metadata/odt_properties.rs",
# OCR/PaddleOCR types (re-exported from lib.rs)
"crates/kreuzberg/src/ocr/types.rs",
"crates/kreuzberg/src/paddle_ocr/mod.rs",
# Plugin registry operations (list_document_extractors etc.)
"crates/kreuzberg/src/plugins/extractor/mod.rs",
# OCR backend registry operations (register/unregister/list/clear)
"crates/kreuzberg/src/plugins/ocr.rs",
# Renderer trait and registry operations
"crates/kreuzberg/src/plugins/renderer.rs",
# Validator registry operations
"crates/kreuzberg/src/plugins/validator/mod.rs",
# Post-processor trait and registry operations
"crates/kreuzberg/src/plugins/processor/trait.rs",
"crates/kreuzberg/src/plugins/processor/mod.rs",
]
features = [
"full",
"pdf",
"ocr",
"paddle-ocr",
"paddle-ocr-types",
"layout-detection",
"layout-types",
"embeddings",
"embedding-presets",
"chunking",
"keywords-yake",
"keywords-rake",
"language-detection",
"html",
"tree-sitter",
"office",
"email",
"archives",
"stopwords",
"auto-rotate",
"auto-rotate-types",
"tokio-runtime",
"api",
"mcp",
"liter-llm",
"quality",
]
[crates.python]
module_name = "_kreuzberg"
exclude_types = [
# Arc<Mutex<T>> opaque types — codegen doesn't generate .lock() for methods
"PooledString",
]
exclude_functions = [
"calculate_quality_score",
]
[crates.python.stubs]
output = "packages/python/kreuzberg/"
[crates.node]
package_name = "@kreuzberg/node"
exclude_types = [
# Opaque Arc<Mutex<T>> types — codegen doesn't generate .lock() calls for methods
"StreamReader",
"PooledString",
]
exclude_functions = [
"calculate_quality_score",
]
[crates.node.extra_dependencies]
# Required by alef-emitted #[serde_with::serde_as] attribute on HashMap<_, Vec<u8>>
# fields (e.g. binary cache maps). Not added by alef-backend-napi automatically.
serde_with = "3"
[crates.ruby]
gem_name = "kreuzberg"
exclude_functions = [
"calculate_quality_score",
]
[crates.ruby.stubs]
output = "packages/ruby/sig/"
[crates.php]
extension_name = "kreuzberg"
exclude_types = [
# Enum types that don't have From<String> implementation for PHP
"ChunkerType",
"OutputFormat",
"EmbeddingModelType",
"UriKind",
"ChunkType",
# PooledString has methods incompatible with ext-php-rs (buffer_mut, deref_mut, fmt(f))
# that trigger E0716/E0596/E0507 in the #[php_impl] macro expansion.
"PooledString",
# Tagged data enums whose flat-PHP-class From<core> impls call .into() on PathBuf,
# usize, TableGrid, Vec<(String,String)>, [(u32,u32);4] fields that don't have the
# required Into<String>/Into<i64> implementations. Tracked upstream in alef-backend-php.
"ChunkSizing",
"EmbeddingModelType",
"NodeContent",
"OcrBoundingGeometry",
]
exclude_functions = [
"calculate_quality_score",
]
[crates.elixir]
app_name = "kreuzberg"
exclude_functions = [
# batch_extract_* take Vec<(PathBuf, Option<FileExtractionConfig>)> or
# Vec<(Vec<u8>, String, Option<...>)> which codegen maps to Vec<String> —
# type mismatch; no safe tuple-marshalling across NIF boundary
"batch_extract_files_sync",
"batch_extract_bytes_sync",
"batch_extract_files",
"batch_extract_bytes",
"calculate_quality_score",
]
exclude_types = [
# Trait types cannot be FFI-bound
"Recyclable",
]
[crates.wasm]
package_name = "@kreuzberg/wasm"
exclude_types = [
# ORT/runtime-only types not available in the wasm-target feature set
"OcrFallbackDecision",
"OcrProcessor",
"OcrCacheStats",
"TessdataManager",
"PageLayoutResult",
# MCP parameter types not compatible with wasm-bindgen (Option<JsValue> fields)
"BatchExtractFilesParams",
]
exclude_functions = [
# paddle-ocr / layout-detection / embeddings / auto-rotate
"embed_texts",
"embed_text",
"detect_layout",
"detect_orientation",
"run_ocr_pipeline",
"process_ocr_element",
"layout_runner",
"record_success_on_current_span",
"record_error_on_current_span",
# ocr module (requires full ocr, not ocr-wasm)
"compute_hash",
"assemble_ocr_markdown",
"validate_tesseract_version",
# paddle_ocr module
"is_language_supported",
"language_to_script_family",
"map_language_code",
# embeddings module
"normalize",
"list_presets",
# image module (ort-dependent preprocessing)
"calculate_optimal_dpi",
"calculate_smart_dpi",
# ort_discovery
"ensure_ort_available",
# pdf layout_runner
"run_layout_for_page",
"run_layout_for_pdf",
# telemetry internals
"sanitize_path",
# layout detection
"config_from_extraction",
"detect_layout_for_images",
# Uses excluded types WasmDetectionResult/WasmRecognizedTable
"recognize_page_tables",
# Functions using excluded types/modules
"generate_embeddings_for_chunks",
"apply_heuristics",
"greedy_nms",
# docx extraction functions (internal, not public API)
"detect_page_breaks_from_docx",
"detect_table_page_numbers",
# ooxml embedded object extraction (internal, returns complex types)
"extract_ooxml_embedded_objects",
"extract_and_process_embedded_files",
# markdown utility functions (internal)
"cells_to_text",
"cells_to_markdown",
# Table processing stubs (internal, incomplete implementations)
"calculate_quality_score",
"reconstruct_table",
"table_to_markdown",
"build_cell_grid",
"post_process_table",
"is_well_formed_table",
"parse_jotdown_attributes",
"parse_jotdown_inline",
# Vec<bool> params not supported by wasm-bindgen
"merge_segments",
# MCP server functions (not available in WASM)
"start_mcp_server_http",
"start_mcp_server_http_with_config",
# Vec<&str> vs &[String] / texts_refs scope — codegen type mismatch
"batch_reduce_tokens",
"chunk_texts_batch",
]
features = ["wasm-target"]
[crates.wasm.extra_dependencies]
async-trait = "0.1"
[crates.ffi]
prefix = "kreuzberg"
header_name = "kreuzberg.h"
lib_name = "kreuzberg_ffi"
visitor_callbacks = false
# Plugin trait-bridge error construction. The generated FFI plugin shims
# (`plugin_impl_initialize`, `plugin_impl_shutdown`) need to construct a
# `KreuzbergError` from a runtime String. KreuzbergError::Plugin is a struct
# variant with two fields and cannot be built via `From<String>`, so we
# provide the literal here. The placeholder local `msg` carries the message.
plugin_error_constructor = "kreuzberg::KreuzbergError::Plugin { message: msg, plugin_name: String::new() }"
exclude_functions = [
"calculate_quality_score",
]
# x86_64-linux-android (emulator) lacks a pyke ORT prebuilt. The C FFI cdylib is
# embedded into Android app builds via jniLibs/x86_64/, so the kreuzberg-ffi
# crate must drop ORT-dependent features on that target. All other targets
# (incl. arm64 Android phones) keep the full ORT-enabled feature set.
[[crates.ffi.target_dep_overrides]]
cfg = 'all(target_os = "android", target_arch = "x86_64")'
features = ["android-target"]
[crates.go]
module = "github.com/kreuzberg-dev/kreuzberg/v5"
package_name = "kreuzberg"
exclude_functions = [
"calculate_quality_score",
]
exclude_types = [
# Internal DTO used only at the FFI boundary (JSON-marshalled). Generated
# Go bindings don't surface this type to consumers, so trait-bridge stubs
# must substitute it with json.RawMessage / []byte.
"InternalDocument",
# SyncExtractor is a Rust-only marker trait for selecting the sync
# extraction path; not exposed via the Go bindings — substitute with
# json.RawMessage in trait-bridge stubs.
"SyncExtractor",
]
[crates.java]
package = "dev.kreuzberg"
ffi_style = "panama"
exclude_types = [
# Trait-bridge opaque types serialized as JSON strings at FFI boundary
"InternalDocument",
"OcrBackendType",
"ProcessingStage",
"SyncExtractor",
]
exclude_functions = [
"calculate_quality_score",
]
[crates.dart]
pubspec_name = "kreuzberg"
lib_name = "kreuzberg"
style = "frb"
features = ["full"]
stub_methods = ["batch_extract_bytes", "batch_extract_bytes_sync"]
exclude_functions = [
"calculate_quality_score",
]
# x86_64-linux-android (emulator) and x86_64-apple-ios (deprecated intel iOS
# simulator) both lack a pyke ORT prebuilt and there is no path to one short
# of compiling ORT from source for those triples. All other targets (incl.
# arm64 Android phones and arm64 iOS device/sim via pyke prebuilts) keep
# the full ORT-enabled feature set.
[[crates.dart.target_dep_overrides]]
cfg = 'all(target_os = "android", target_arch = "x86_64")'
features = ["android-target"]
default_features = false
[crates.kotlin_android]
# Self-contained Android library project at packages/kotlin-android/. Server-
# side Kotlin/JVM consumers use the Java binding directly (Kotlin interops
# with Java records natively); the Android AAR ships jniLibs/arm64-v8a/ +
# jniLibs/x86_64/ so Gradle embeds the kreuzberg-ffi cdylib into app builds
# without manual setup. Emitted by alef-backend-kotlin-android (alef ≥ 0.16).
package = "dev.kreuzberg"
namespace = "dev.kreuzberg"
artifact_id = "kreuzberg-android"
group_id = "dev.kreuzberg"
abis = ["arm64-v8a", "x86_64"]
exclude_functions = [
# embed_texts_async creates naming conflict with suspend wrapper of embed_texts.
# Callers should use embedTextsAsync (suspend) from embed_texts instead.
"embed_texts_async",
"calculate_quality_score",
]
[crates.swift]
module_name = "Kreuzberg"
package_name = "Kreuzberg"
features = ["full"]
exclude_types = [
# Internal DTO type used by plugin trait-bridges; marshalled as JSON strings at FFI boundary
"InternalDocument",
]
exclude_fields = ["ExtractionResult.ocr_internal_document", "OcrExtractionResult.internal_document"]
exclude_functions = [
"calculate_quality_score",
]
[crates.csharp]
namespace = "Kreuzberg"
# project_file directs `dotnet format` to the .csproj inside packages/csharp/Kreuzberg/
# rather than the parent directory (which has no .sln or .csproj at its root).
project_file = "packages/csharp/Kreuzberg/Kreuzberg.csproj"
exclude_functions = [
"calculate_quality_score",
]
[crates.r]
package_name = "kreuzberg"
exclude_functions = [
"calculate_quality_score",
]
[crates.zig]
module_name = "kreuzberg"
exclude_functions = [
"calculate_quality_score",
]
[crates.output]
python = "crates/kreuzberg-py/src/"
node = "crates/kreuzberg-node/src/"
ruby = "packages/ruby/ext/kreuzberg_rb/src/"
php = "crates/kreuzberg-php/src/"
ffi = "crates/kreuzberg-ffi/src/"
go = "packages/go/v5/"
elixir = "packages/elixir/native/kreuzberg_nif/src/"
wasm = "crates/kreuzberg-wasm/src/"
java = "packages/java/"
csharp = "packages/csharp/src/"
r = "packages/r/src/rust/src/"
kotlin_android = "packages/kotlin-android/"
swift = "packages/swift/Sources/Kreuzberg/"
dart = "packages/dart/lib/src/"
zig = "packages/zig/src/"
[crates.exclude]
types = [
# Generic types — alef(skip) source annotation not picked up for generic structs
# (alef-extract does not propagate cfg_attr through type-parameter bounds).
"Pool",
# Internal docx/hwp parser types — kept hidden from bindings (still
# alef(skip)-annotated in source); serde derives added in Phase 1 but
# binding surface is unchanged. Listed here to defend against future
# codegen passes that might re-emit them as transitive container fields.
"kreuzberg::extraction::docx::parser::Table",
"kreuzberg::extraction::docx::parser::TableCell",
"kreuzberg::extraction::docx::parser::TableRow",
"kreuzberg::extraction::docx::parser::HeaderFooter",
"kreuzberg::extraction::docx::parser::Note",
"kreuzberg::extraction::hwp::model::Section",
]
methods = [
# From<T> trait impls — auto-emitted by derive/trait-bridge
"ApiError.from",
# fmt trait impls — auto-emitted by derive, not callable from bindings
"BBox.fmt",
"LayoutDetection.fmt",
"PooledString.fmt",
# Deref/DerefMut impls for internal pooled types — not part of public API
"PooledString.deref",
"PooledString.deref_mut",
"PooledString.buffer_mut",
# Internal methods that can't easily be source-annotated (trait impls or unusual patterns)
"BBox.center",
"TessdataManager.manifest",
# Deprecated Rust-only aliases — excluded from bindings
"LayoutDetection.class",
"LayoutRegion.class",
]
[crates.lint.python]
precondition = "command -v ruff >/dev/null 2>&1"
before = "cd packages/python && uv sync --no-install-project --no-install-workspace"
typecheck = "cd packages/python && uv run --no-sync mypy ."
[crates.lint.node]
precondition = "command -v pnpm >/dev/null 2>&1"
format = "pnpm dlx oxfmt crates/kreuzberg-node"
check = "pnpm dlx oxlint crates/kreuzberg-node"
[crates.lint.ruby]
precondition = "command -v bundle >/dev/null 2>&1"
before = "cd packages/ruby && bundle install"
format = "cd packages/ruby && bundle exec rubocop -a ."
[crates.lint.php]
precondition = "command -v composer >/dev/null 2>&1"
before = "cd packages/php && composer install"
format = "cd packages/php && vendor/bin/php-cs-fixer fix ."
check = "cd packages/php && vendor/bin/phpstan --configuration=phpstan.neon --memory-limit=512M"
[crates.lint.go]
before = "cargo build --release -p kreuzberg-ffi"
[crates.lint.elixir]
precondition = "command -v mix >/dev/null 2>&1"
before = "cd packages/elixir && mix deps.get"
check = "cd packages/elixir && mix credo"
[crates.lint.java]
precondition = "command -v mvn >/dev/null 2>&1"
check = "mvn -f packages/java/pom.xml spotless:check -q"
[crates.lint.kotlin_android]
precondition = "command -v gradle >/dev/null 2>&1"
format = "cd packages/kotlin-android && gradle ktlintFormat --no-daemon"
check = "cd packages/kotlin-android && gradle ktlintCheck --no-daemon"
[crates.lint.swift]
precondition = "command -v swift >/dev/null 2>&1"
format = "swift format --in-place --recursive packages/swift/Sources"
check = "swift format lint --recursive packages/swift/Sources"
[crates.lint.dart]
precondition = "command -v dart >/dev/null 2>&1"
format = "cd packages/dart && dart format ."
check = "cd packages/dart && dart analyze"
[crates.lint.zig]
precondition = "command -v zig >/dev/null 2>&1"
format = "cd packages/zig && zig fmt src"
check = "cd packages/zig && zig fmt --check src"
[crates.update.r]
precondition = "command -v Rscript >/dev/null"
update = "cd packages/r && Rscript -e \"if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes', repos = 'https://cloud.r-project.org'); remotes::update_packages(ask = FALSE)\""
upgrade = "cd packages/r && Rscript -e \"if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes', repos = 'https://cloud.r-project.org'); remotes::update_packages(ask = FALSE)\""
[crates.update.kotlin_android]
# kotlin-android has no managed lockfile — dependency versions are pinned
# directly in the alef-generated build.gradle.kts. Updating means running
# alef regen with bumped versions in the upstream alef-backend-kotlin-android,
# not a gradle command. The `dependencyUpdates` task would require applying
# the ben-manes versions plugin which is not part of the generated template.
precondition = "command -v gradle >/dev/null 2>&1"
update = "echo 'kotlin-android: no-op (versions pinned by alef regen)'"
upgrade = "echo 'kotlin-android: no-op (versions pinned by alef regen)'"
[crates.update.swift]
precondition = "command -v swift >/dev/null 2>&1"
update = "cd packages/swift && swift package update"
upgrade = "cd packages/swift && swift package update"
[crates.update.dart]
precondition = "command -v dart >/dev/null 2>&1"
update = "cd packages/dart && dart pub upgrade"
upgrade = "cd packages/dart && dart pub upgrade --major-versions"
[crates.update.zig]
precondition = "command -v zig >/dev/null 2>&1"
update = "cd packages/zig && zig build --fetch"
upgrade = "cd packages/zig && zig build --fetch"
[crates.test.python]
precondition = "command -v uv >/dev/null 2>&1"
# `uv run --no-sync` in the e2e step never refreshes the e2e venv, and `uv sync`
# installs `kreuzberg` as a *copied* (non-editable) snapshot. A stale snapshot
# built before the wrapper modules were generated leaves the venv with only
# `_kreuzberg.abi3.so` + `py.typed`, breaking `from kreuzberg import ...`.
# Force a clean rebuild of that snapshot from current source before testing.
before = "cd packages/python && maturin develop --release && cd ../../e2e/python && uv sync --reinstall-package kreuzberg"
e2e = "cd e2e/python && uv run --no-sync pytest tests/ -q"
[crates.test.node]
precondition = "command -v pnpm >/dev/null 2>&1"
before = "cd crates/kreuzberg-node && napi build --output-dir . --release --platform"
e2e = "cd e2e/node && CI=true pnpm install && pnpm test"
[crates.test.go]
precondition = "command -v go >/dev/null 2>&1"
before = "cargo build --release -p kreuzberg-ffi && mkdir -p packages/go/v5/include && cp crates/kreuzberg-ffi/include/kreuzberg.h packages/go/v5/include/"
e2e = "cd e2e/go && go test ./... -count=1"
[crates.test.ruby]
precondition = "command -v bundle >/dev/null 2>&1"
before = "cd packages/ruby && bundle install && bundle exec rake compile && cd ../../e2e/ruby && bundle install"
e2e = "cd e2e/ruby && bundle exec rspec"
[crates.test.php]
precondition = "command -v composer >/dev/null 2>&1"
before = "cargo build --release -p kreuzberg-php && cd e2e/php && composer install && bash ../../scripts/setup-php-ext-ini.sh"
e2e = "cd e2e/php && PHP_INI_SCAN_DIR= php -c php.ini vendor/bin/phpunit"
[crates.test.java]
precondition = "command -v mvn >/dev/null 2>&1"
before = "cargo build --release -p kreuzberg-ffi && cd packages/java && mvn -T 1 package -DskipTests -q"
e2e = "cd e2e/java && mvn test -q"
[crates.test.csharp]
precondition = "command -v dotnet >/dev/null 2>&1"
before = "cargo build --release -p kreuzberg-ffi && bash scripts/stage_csharp_native_local.sh"
e2e = "dotnet test e2e/csharp"
[crates.test.elixir]
precondition = "command -v mix >/dev/null 2>&1"
before = "cd packages/elixir && KREUZBERG_BUILD=1 mix deps.get && KREUZBERG_BUILD=1 mix compile && cd ../../e2e/elixir && KREUZBERG_BUILD=1 mix deps.get"
e2e = "cd e2e/elixir && KREUZBERG_BUILD=1 mix test"
[crates.test.r]
precondition = "command -v Rscript >/dev/null 2>&1"
before = "rm -f packages/r/src/kreuzberg.so packages/r/src/entrypoint.o && cd packages/r/src/rust && cargo build --release"
command = "cd e2e/r && Rscript -e \"devtools::load_all('../../packages/r'); testthat::set_max_fails(Inf); testthat::test_dir('tests')\""
e2e = "cd e2e/r && Rscript -e \"devtools::load_all('../../packages/r'); testthat::set_max_fails(Inf); testthat::test_dir('tests')\""
[crates.test.wasm]
precondition = "command -v wasm-pack >/dev/null 2>&1"
# Build the nodejs target into pkg/nodejs/ so the e2e harness (Node-based vitest)
# can resolve the wasi/env imports — wasm-pack inlines them as require() shims
# under --target nodejs. The web target leaves them as ESM `import * as foo from "env"`
# statements that Node cannot resolve. The published bundler target (pkg/*) is built
# separately by the publish pipeline.
before = "wasm-pack build crates/kreuzberg-wasm --release --target nodejs --out-dir pkg/nodejs && cd e2e/wasm && pnpm install --ignore-workspace"
e2e = "cd e2e/wasm && pnpm test"
[crates.test.rust]
precondition = "command -v cargo >/dev/null 2>&1"
e2e = "cd e2e/rust && cargo test"
[crates.test.kotlin_android]
precondition = "command -v gradle >/dev/null 2>&1"
before = ["cargo build --release -p kreuzberg-ffi", "ln -sf libkreuzberg_ffi.dylib target/release/libkreuzberg_jni.dylib 2>/dev/null || ln -sf libkreuzberg_ffi.so target/release/libkreuzberg_jni.so 2>/dev/null || true"]
command = "cd packages/kotlin-android && gradle test --no-daemon"
e2e = "cd e2e/kotlin_android && gradle test --no-daemon"
[crates.test.swift]
precondition = "command -v swift >/dev/null 2>&1"
before = ["cargo build --release -p kreuzberg-swift", "bash scripts/setup-swift-bridge.sh"]
command = "cd packages/swift && swift test"
e2e = "cd e2e/swift_e2e && swift test"
[crates.test.dart]
# packages/dart is a Flutter plugin (pubspec declares `flutter:` SDK constraint
# plus a `flutter: plugin:` ffiPlugin block), so `dart pub` is rejected by the
# resolver — use `flutter pub` / `flutter test` throughout.
precondition = "command -v flutter >/dev/null 2>&1"
before = """
{ command -v flutter_rust_bridge_codegen >/dev/null 2>&1 || cargo install flutter_rust_bridge_codegen --version 2.12.0; } && \
cd packages/dart && flutter pub get && cd rust && \
{ \
INPUT_HASH=$(printf '%s' "$(cat src/lib.rs flutter_rust_bridge.yaml)" | shasum | cut -d' ' -f1); \
HASH_FILE=.frb_codegen_hash; \
if [ -f "$HASH_FILE" ] && [ "$(cat "$HASH_FILE" 2>/dev/null)" = "$INPUT_HASH" ]; then \
echo "FRB codegen skipped (inputs unchanged)"; \
else \
flutter_rust_bridge_codegen generate --config-file flutter_rust_bridge.yaml && \
echo "$INPUT_HASH" > "$HASH_FILE"; \
fi; \
} && cd ../../.. && cargo build --release -p kreuzberg-dart
"""
command = "cd packages/dart && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter pub get && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter test"
e2e = "cd e2e/dart && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter pub get && DYLD_FALLBACK_LIBRARY_PATH=$PWD/../../target/release LD_LIBRARY_PATH=$PWD/../../target/release FRB_DART_LOAD_EXTERNAL_LIBRARY_NATIVE_LIB_DIR=$PWD/../../target/release flutter test"
[crates.test.zig]
precondition = "command -v zig >/dev/null 2>&1"
before = "cargo build --release -p kreuzberg-ffi"
command = "cd packages/zig && zig build test"
e2e = "cd e2e/zig && zig build test"
[crates.publish]
core_crate = "crates/kreuzberg"
[crates.publish.languages.ruby]
vendor_mode = "registry"
[crates.publish.languages.elixir]
vendor_mode = "registry"
nif_versions = ["2.16", "2.17"]
[crates.publish.languages.r]
vendor_mode = "registry"
[crates.publish.languages.dart]
# dart/rust/Cargo.toml carries path deps on kreuzberg; rewrite to registry on publish.
# NOTE: resolve_binding_manifest does not yet resolve the Dart manifest path — the rewrite
# is a no-op until alef is updated to handle packages/dart/rust/Cargo.toml.
vendor_mode = "registry"
[crates.publish.languages.ffi]
pkg_config = true
cmake_config = true
[crates.e2e]
fixtures = "fixtures"
output = "e2e"
languages = [
"python",
"rust",
"node",
"go",
"ruby",
"elixir",
"wasm",
"java",
"csharp",
"php",
"r",
"dart",
"kotlin_android",
"swift",
"zig",
"homebrew",
]
fields_optional = [
"metadata.format",
"metadata.format.excel",
"metadata.format.excel.sheet_count",
"metadata.title",
"metadata.subject",
"metadata.authors",
"metadata.keywords",
"metadata.language",
"metadata.created_at",
"metadata.modified_at",
"metadata.created_by",
"metadata.modified_by",
"metadata.output_format",
"metadata.category",
"metadata.tags",
"metadata.document_version",
"metadata.abstract_text",
"chunks",
"images",
"pages",
"elements",
"djot_content",
"ocr_elements",
"document",
"extraction_method",
"detected_languages",
"extracted_keywords",
"quality_score",
"annotations",
]
fields_array = [
"tables",
"chunks",
"images",
"pages",
"elements",
"ocr_elements",
"detected_languages",
"extracted_keywords",
"annotations",
"processing_warnings",
"metadata.sheet_names",
"metadata.format.excel.sheet_names",
"document.node_types",
]
fields_method_calls = ["metadata.format.excel"]
result_fields = [
"content",
"mime_type",
"djot_content",
"result",
]
[crates.e2e.call]
function = "extract_file"
module = "kreuzberg"
result_var = "result"
async = true
returns_result = true
args = [
{ name = "path", field = "input.path", type = "file_path" },
{ name = "mime_type", field = "input.mime_type", type = "string", optional = true },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.call.overrides.rust]
crate_name = "kreuzberg"
function = "extract_file"
[crates.e2e.call.overrides.python]
module = "kreuzberg"
function = "extract_file"
options_type = "ExtractionConfig"
[crates.e2e.call.overrides.node]
module = "kreuzberg"
function = "extractFile"
options_type = "ExtractionConfig"
arg_order = ["path", "config", "mime_type"]
result_enum_fields = { outputFormat = "OutputFormat", "metadata.format" = "FormatMetadata" }
[crates.e2e.call.overrides.go]
module = "github.com/kreuzberg-dev/kreuzberg/v5"
function = "ExtractFile"
alias = "kreuzberg"
options_type = "ExtractionConfig"
[crates.e2e.call.overrides.java]
class = "dev.kreuzberg.Kreuzberg"
function = "extractFile"
options_type = "ExtractionConfig"
enum_fields = { outputFormat = "OutputFormat" }
[crates.e2e.call.overrides.csharp]
class = "KreuzbergLib"
function = "ExtractFile"
options_type = "ExtractionConfig"
json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" }
nested_types = { model = "EmbeddingModelType", embedding = "EmbeddingConfig", keywords = "KeywordConfig", pages = "PageConfig", securityLimits = "SecurityLimits", treeSitter = "TreeSitterConfig" }
[crates.e2e.call.overrides.ruby]
options_type = "Kreuzberg::ExtractionConfig"
[crates.e2e.call.overrides.elixir]
# Elixir Rustler facades don't consistently use keyword opts (some use positional defaults),
# so struct-literal generation causes mismatches. Use JSON strings instead for compatibility.
# handle_struct_type = "ExtractionConfig"
[crates.e2e.call.overrides.wasm]
options_type = "WasmExtractionConfig"
bigint_fields = ["cacheTtlSecs", "extractionTimeoutSecs"]
[crates.e2e.call.overrides.swift]
unnamed_arg_indices = [0, 1, 2]
[crates.e2e.call.overrides.php]
class = "Kreuzberg\\Kreuzberg"
function = "extractFile"
options_type = "ExtractionConfig"
[crates.e2e.call.overrides.r]
function = "extract_file"
options_type = "ExtractionConfig"
[crates.e2e.call.overrides.zig]
function = "extract_file_sync"
async = false
result_is_json_struct = true
[crates.e2e.call.overrides.dart]
# Dart reads files to bytes before passing to Rust; use extractBytes (bytes+mime)
# instead of extractFile (path string). The file_path arg type generates readAsBytesSync().
function = "extractBytes"
arg_name_map = { path = "content" }
[crates.e2e.calls.extract_file]
function = "extract_file"
module = "kreuzberg"
async = true
returns_result = true
# kreuzberg's `chunks` is a plain `Vec<TextChunk>` field on `ExtractionResult`,
# not a streaming channel. Without this opt-out, alef's streaming auto-detect
# triggers on any assertion targeting `chunks` / `chunks.length` and emits
# `range over stream` / `ChatCompletionChunk` codegen.
streaming = false
args = [
{ name = "path", field = "input.path", type = "file_path" },
{ name = "mime_type", field = "input.mime_type", type = "string", optional = true },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.extract_file.overrides.dart]
# Dart can pass file paths through FRB as plain strings. The dart e2e generator
# routes source-code paths (e.g. `code/hello.py`) through extractFile so
# CodeExtractor's path-based extension detection runs; everything else is
# remapped to extractBytes automatically.
async = true
[crates.e2e.calls.extract_file.overrides.swift]
async = false
unnamed_arg_indices = [0, 1, 2]
[crates.e2e.calls.extract_file.overrides.zig]
# Zig binding has no async runtime; redirect to the sync variant and parse JSON.
function = "extract_file_sync"
async = false
result_is_json_struct = true
[crates.e2e.calls.extract_file.overrides.php]
# PHP facade exposes the async variant under the bare name (`extractFile`);
# the codegen emits it automatically — no override needed.
[crates.e2e.calls.extract_bytes]
function = "extract_bytes"
module = "kreuzberg"
async = true
returns_result = true
streaming = false
args = [
{ name = "content", field = "input.data", type = "bytes" },
{ name = "mime_type", field = "input.mime_type", type = "string" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.extract_bytes.overrides.csharp]
options_type = "ExtractionConfig"
options_via = "from_json"
json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" }
[crates.e2e.calls.extract_bytes.overrides.swift]
async = false
unnamed_arg_indices = [0, 1, 2]
[crates.e2e.calls.extract_bytes.overrides.python]
arg_name_map = { data = "content" }
options_type = "ExtractionConfig"
[crates.e2e.calls.extract_bytes.overrides.r]
arg_name_map = { data = "content" }
[crates.e2e.calls.extract_bytes.overrides.elixir]
# Rustler facade for extract_bytes_async uses positional default style:
# func(content, mime_type, config \\ nil), not keyword opts.
# Don't emit keyword form for config arg.
[crates.e2e.calls.extract_bytes.overrides.php]
# PHP facade exposes the async variant under the bare name (`extractBytes`);
# the codegen emits it automatically — no override needed.
[crates.e2e.calls.extract_bytes.overrides.zig]
function = "extract_bytes_sync"
async = false
result_is_json_struct = true
[crates.e2e.calls.extract_file_sync]
function = "extract_file_sync"
module = "kreuzberg"
async = false
returns_result = true
streaming = false
skip_languages = ["wasm"]
args = [
{ name = "path", field = "input.path", type = "file_path" },
{ name = "mime_type", field = "input.mime_type", type = "string", optional = true },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.extract_file_sync.overrides.node]
options_type = "ExtractionConfig"
arg_order = ["path", "config", "mime_type"]
[crates.e2e.calls.extract_file_sync.overrides.csharp]
options_type = "ExtractionConfig"
options_via = "from_json"
json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" }
[crates.e2e.calls.extract_file_sync.overrides.swift]
unnamed_arg_indices = [0, 1, 2]
[crates.e2e.calls.extract_file_sync.overrides.zig]
function = "extract_file_sync"
async = false
result_is_json_struct = true
[crates.e2e.calls.extract_file_sync.overrides.dart]
# Dart can pass file paths through FRB as plain strings — extractFileSync(path,
# mimeType?, config) is the supported facade. The dart e2e generator detects
# `text/x-source-code` paths (e.g. `code/hello.py`) and keeps the path-based call
# so CodeExtractor's extract_file (extension-based language detection) runs.
# For everything else the generator remaps to extractBytesSync.
async = false
[crates.e2e.calls.extract_bytes_sync]
function = "extract_bytes_sync"
module = "kreuzberg"
async = false
returns_result = true
streaming = false
skip_languages = ["wasm"]
args = [
{ name = "content", field = "input.data", type = "bytes" },
{ name = "mime_type", field = "input.mime_type", type = "string" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.extract_bytes_sync.overrides.csharp]
options_type = "ExtractionConfig"
json_object_types = { chunking = "ChunkingConfig", ocr = "OcrConfig", images = "ImageExtractionConfig", html_output = "HtmlOutputConfig", language_detection = "LanguageDetectionConfig", postprocessor = "PostProcessorConfig", acceleration = "AccelerationConfig", email = "EmailConfig", pages = "PageConfig", pdf_options = "PdfConfig", layout = "LayoutDetectionConfig", tree_sitter = "TreeSitterConfig", structured_extraction = "StructuredExtractionConfig", content_filter = "ContentFilterConfig", token_reduction = "TokenReductionOptions" }
[crates.e2e.calls.extract_bytes_sync.overrides.python]
arg_name_map = { data = "content" }
options_type = "ExtractionConfig"
[crates.e2e.calls.extract_bytes_sync.overrides.r]
arg_name_map = { data = "content" }
[crates.e2e.calls.extract_bytes_sync.overrides.swift]
unnamed_arg_indices = [0, 1, 2]
[crates.e2e.calls.extract_bytes_sync.overrides.zig]
function = "extract_bytes_sync"
async = false
result_is_json_struct = true
[crates.e2e.calls.render_html.overrides.python]
result_is_simple = true
arg_name_map = { doc = "html" }
[crates.e2e.calls.render_markdown.overrides.python]
result_is_simple = true
arg_name_map = { doc = "html" }
[crates.e2e.calls.render_djot.overrides.python]
result_is_simple = true
arg_name_map = { doc = "html" }
[crates.e2e.calls.render_json.overrides.python]
result_is_simple = true
arg_name_map = { doc = "html" }
[crates.e2e.calls.render_plain.overrides.python]
result_is_simple = true
arg_name_map = { doc = "html" }
[crates.e2e.calls.djot_to_html.overrides.python]
result_is_simple = true
arg_name_map = { djot = "djot_source" }
[crates.e2e.calls.chunk_text.overrides.wasm]
options_type = "WasmChunkingConfig"
[crates.e2e.calls.chunk_text.overrides.rust]
extra_args = ["None"]
[crates.e2e.calls.chunk_text.overrides.node]
options_type = "JsChunkingConfig"
[crates.e2e.calls.chunk_text.overrides.csharp]
options_type = "ChunkingConfig"
extra_args = ["null"]
[crates.e2e.calls.chunk_text.overrides.java]
options_type = "ChunkingConfig"
[crates.e2e.calls.chunk_semantic.overrides.wasm]
options_type = "WasmChunkingConfig"
[crates.e2e.calls.chunk_semantic.overrides.rust]
extra_args = ["None"]
[crates.e2e.calls.chunk_semantic.overrides.node]
options_type = "JsChunkingConfig"
[crates.e2e.calls.chunk_semantic.overrides.csharp]
options_type = "ChunkingConfig"
extra_args = ["null"]
[crates.e2e.calls.chunk_semantic.overrides.java]
options_type = "ChunkingConfig"
[crates.e2e.calls.chunk_texts_batch.overrides.node]
options_type = "JsChunkingConfig"
[crates.e2e.calls.chunk_texts_batch.overrides.csharp]
options_type = "ChunkingConfig"
[crates.e2e.calls.chunk_texts_batch.overrides.java]
options_type = "ChunkingConfig"
[crates.e2e.calls.detect_languages.overrides.go]
options_type = "LanguageDetectionConfig"
[crates.e2e.calls.detect_languages.overrides.node]
options_type = "JsLanguageDetectionConfig"
[crates.e2e.calls.detect_languages.overrides.csharp]
options_type = "LanguageDetectionConfig"
[crates.e2e.calls.detect_languages.overrides.java]
options_type = "LanguageDetectionConfig"
[crates.e2e.calls.detect_mime_type_from_bytes]
function = "detect_mime_type_from_bytes"
module = "kreuzberg"
async = false
returns_result = true
result_is_simple = true
args = [{ name = "content", field = "input.data", type = "bytes" }]
[crates.e2e.calls.detect_mime_type_from_bytes.overrides.swift]
unnamed_arg_indices = [0]
[crates.e2e.calls.validate_chunking_params.overrides.python]
arg_name_map = { max_characters = "max_chars", overlap = "max_overlap" }
[crates.e2e.calls.normalize_whitespace.overrides.python]
result_is_simple = true
arg_name_map = { text = "s" }
[crates.e2e.calls.dedup_text.overrides.go]
result_is_array = true
result_is_simple = true
returns_result = true
[crates.e2e.calls.get_extensions_for_mime]
result_is_array = true
result_is_simple = true
function = "get_extensions_for_mime"
module = "kreuzberg"
async = false
returns_result = true
args = [{ name = "mime_type", field = "input.mime_type", type = "string" }]
[crates.e2e.calls.batch_extract_files]
function = "batch_extract_files"
module = "kreuzberg"
async = true
returns_result = true
result_is_vec = true
skip_languages = ["wasm"]
args = [
{ name = "paths", field = "input.paths", type = "json_object", owned = true, go_type = "BatchFileItem", element_type = "BatchFileItem" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.batch_extract_files.overrides.csharp]
result_is_vec = true
options_type = "ExtractionConfig"
[crates.e2e.calls.batch_extract_files.overrides.java]
result_is_vec = true
options_type = "ExtractionConfig"
enum_fields = { outputFormat = "OutputFormat" }
[crates.e2e.calls.batch_extract_files.overrides.python]
result_is_vec = true
arg_name_map = { paths = "items" }
options_type = "ExtractionConfig"
[crates.e2e.calls.batch_extract_files.overrides.r]
arg_name_map = { paths = "items" }
[crates.e2e.calls.batch_extract_files.overrides.php]
# PHP facade exposes the async variant under the bare name (`batchExtractFiles`).
[crates.e2e.calls.batch_extract_files.overrides.zig]
function = "batch_extract_files_sync"
async = false
[crates.e2e.calls.batch_extract_files_sync]
function = "batch_extract_files_sync"
module = "kreuzberg"
async = false
returns_result = true
result_is_vec = true
skip_languages = ["wasm"]
args = [
{ name = "paths", field = "input.paths", type = "json_object", owned = true, go_type = "BatchFileItem", element_type = "BatchFileItem" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.batch_extract_files_sync.overrides.csharp]
result_is_vec = true
options_type = "ExtractionConfig"
[crates.e2e.calls.batch_extract_files_sync.overrides.java]
result_is_vec = true
options_type = "ExtractionConfig"
enum_fields = { outputFormat = "OutputFormat" }
[crates.e2e.calls.batch_extract_files_sync.overrides.python]
result_is_vec = true
arg_name_map = { paths = "items" }
options_type = "ExtractionConfig"
[crates.e2e.calls.batch_extract_files_sync.overrides.r]
arg_name_map = { paths = "items" }
[crates.e2e.calls.reduce_tokens.overrides.go]
result_is_simple = true
options_type = "TokenReductionConfig"
[crates.e2e.calls.reduce_tokens.overrides.node]
result_is_simple = true
options_type = "JsTokenReductionConfig"
[crates.e2e.calls.reduce_tokens.overrides.csharp]
result_is_simple = true
options_type = "TokenReductionConfig"
[crates.e2e.calls.reduce_tokens.overrides.python]
result_is_simple = true
options_type = "TokenReductionConfig"
[crates.e2e.calls.reduce_tokens.overrides.java]
result_is_simple = true
options_type = "TokenReductionConfig"
[crates.e2e.calls.serialize_to_toon.overrides.node]
result_is_simple = true
options_type = "ExtractionConfig"
[crates.e2e.calls.serialize_to_toon.overrides.csharp]
result_is_simple = true
options_type = "ExtractionConfig"
[crates.e2e.calls.serialize_to_json.overrides.node]
result_is_simple = true
options_type = "ExtractionConfig"
[crates.e2e.calls.serialize_to_json.overrides.csharp]
result_is_simple = true
options_type = "ExtractionConfig"
[crates.e2e.calls.detect_or_validate.overrides.go]
returns_result = true
[crates.e2e.calls.embed_texts]
result_is_simple = true
result_is_array = true
function = "embed_texts"
module = "kreuzberg"
async = false
returns_result = true
skip_languages = ["wasm"]
options_type = "EmbeddingConfig"
args = [
{ name = "texts", field = "input.texts", type = "json_object", owned = true, element_type = "String" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.embed_texts.overrides.csharp]
result_is_simple = true
options_type = "EmbeddingConfig"
nested_types = { embedding_model = "EmbeddingModelType" }
[crates.e2e.calls.embed_texts.overrides.java]
result_is_simple = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts.overrides.go]
result_is_simple = true
result_is_array = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts.overrides.php]
result_is_simple = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts.overrides.dart]
result_is_simple = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts.overrides.swift]
options_type = "EmbeddingConfig"
# Zig returns `[]u8` (JSON) for Vec<Vec<f32>>; assertions are rendered against
# the parsed JSON value rather than struct fields.
[crates.e2e.calls.embed_texts.overrides.zig]
result_is_json_struct = true
[crates.e2e.calls.batch_extract_bytes]
function = "batch_extract_bytes"
module = "kreuzberg"
async = true
returns_result = true
result_is_vec = true
skip_languages = ["wasm"]
args = [
{ name = "items", field = "input.items", type = "json_object", owned = true, go_type = "BatchBytesItem", element_type = "BatchBytesItem" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.batch_extract_bytes.overrides.csharp]
result_is_vec = true
options_type = "ExtractionConfig"
[crates.e2e.calls.batch_extract_bytes.overrides.java]
result_is_vec = true
options_type = "ExtractionConfig"
enum_fields = { outputFormat = "OutputFormat" }
[crates.e2e.calls.batch_extract_bytes.overrides.php]
# PHP facade exposes the async variant under the bare name (`batchExtractBytes`).
[crates.e2e.calls.batch_extract_bytes.overrides.zig]
function = "batch_extract_bytes_sync"
async = false
[crates.e2e.calls.batch_extract_bytes_sync]
function = "batch_extract_bytes_sync"
module = "kreuzberg"
async = false
returns_result = true
result_is_vec = true
skip_languages = ["wasm"]
args = [
{ name = "items", field = "input.items", type = "json_object", owned = true, go_type = "BatchBytesItem", element_type = "BatchBytesItem" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.batch_extract_bytes_sync.overrides.csharp]
result_is_vec = true
options_type = "ExtractionConfig"
[crates.e2e.calls.batch_extract_bytes_sync.overrides.java]
result_is_vec = true
options_type = "ExtractionConfig"
enum_fields = { outputFormat = "OutputFormat" }
[crates.e2e.calls.list_document_extractors]
result_is_simple = true
function = "list_document_extractors"
module = "kreuzberg"
async = false
args = []
[crates.e2e.calls.list_document_extractors.overrides.go]
result_is_simple = true
returns_result = true
[crates.e2e.calls.list_ocr_backends]
result_is_simple = true
function = "list_ocr_backends"
module = "kreuzberg"
async = false
args = []
[crates.e2e.calls.list_ocr_backends.overrides.go]
result_is_simple = true
returns_result = true
[crates.e2e.calls.list_post_processors]
result_is_simple = true
function = "list_post_processors"
module = "kreuzberg"
async = false
args = []
[crates.e2e.calls.list_post_processors.overrides.go]
result_is_simple = true
returns_result = true
[crates.e2e.calls.list_validators]
result_is_simple = true
function = "list_validators"
module = "kreuzberg"
async = false
args = []
[crates.e2e.calls.list_validators.overrides.go]
result_is_simple = true
returns_result = true
[crates.e2e.calls.list_embedding_backends]
result_is_simple = true
function = "list_embedding_backends"
module = "kreuzberg"
async = false
args = []
[crates.e2e.calls.list_embedding_backends.overrides.go]
result_is_simple = true
returns_result = true
[crates.e2e.calls.list_renderers]
result_is_simple = true
function = "list_renderers"
module = "kreuzberg"
async = false
args = []
[crates.e2e.calls.list_renderers.overrides.go]
result_is_simple = true
returns_result = true
[crates.e2e.calls.clear_ocr_backends]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.clear_ocr_backends.overrides.python]
function = "clear_ocr_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.rust]
function = "clear_ocr_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.node]
function = "clearOcrBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.go]
function = "ClearOCRBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.ruby]
function = "clear_ocr_backends"
module = "Kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.php]
function = "clearOcrBackends"
module = "Kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.java]
function = "clearOcrBackends"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.csharp]
function = "ClearOcrBackends"
module = "Kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.swift]
function = "clearOcrBackends"
module = "Kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.dart]
function = "clearOcrBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.wasm]
function = "clearOcrBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.r]
function = "clear_ocr_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.elixir]
function = "clear_ocr_backends"
module = "Kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.zig]
function = "clear_ocr_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_ocr_backends.overrides.kotlin_android]
function = "clearAll"
class = "OcrBackendBridge"
[crates.e2e.calls.clear_post_processors]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.clear_post_processors.overrides.python]
function = "clear_post_processors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.rust]
function = "clear_post_processors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.node]
function = "clearPostProcessors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.go]
function = "ClearPostProcessors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.ruby]
function = "clear_post_processors"
module = "Kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.php]
function = "clearPostProcessors"
module = "Kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.java]
function = "clearPostProcessors"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.csharp]
function = "ClearPostProcessors"
module = "Kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.swift]
function = "clearPostProcessors"
module = "Kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.dart]
function = "clearPostProcessors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.wasm]
function = "clearPostProcessors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.r]
function = "clear_post_processors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.elixir]
function = "clear_post_processors"
module = "Kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.zig]
function = "clear_post_processors"
module = "kreuzberg"
[crates.e2e.calls.clear_post_processors.overrides.kotlin_android]
function = "clearAll"
class = "PostProcessorBridge"
[crates.e2e.calls.clear_validators]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.clear_validators.overrides.python]
function = "clear_validators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.rust]
function = "clear_validators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.node]
function = "clearValidators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.go]
function = "ClearValidators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.ruby]
function = "clear_validators"
module = "Kreuzberg"
[crates.e2e.calls.clear_validators.overrides.php]
function = "clearValidators"
module = "Kreuzberg"
[crates.e2e.calls.clear_validators.overrides.java]
function = "clearValidators"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.clear_validators.overrides.csharp]
function = "ClearValidators"
module = "Kreuzberg"
[crates.e2e.calls.clear_validators.overrides.swift]
function = "clearValidators"
module = "Kreuzberg"
[crates.e2e.calls.clear_validators.overrides.dart]
function = "clearValidators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.wasm]
function = "clearValidators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.r]
function = "clear_validators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.elixir]
function = "clear_validators"
module = "Kreuzberg"
[crates.e2e.calls.clear_validators.overrides.zig]
function = "clear_validators"
module = "kreuzberg"
[crates.e2e.calls.clear_validators.overrides.kotlin_android]
function = "clearAll"
class = "ValidatorBridge"
[crates.e2e.calls.unregister_ocr_backend]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
# function/module left empty so alef skips API surface validation.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = [{ name = "name", field = "input.name", type = "string" }]
[crates.e2e.calls.unregister_ocr_backend.overrides.python]
function = "unregister_ocr_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.rust]
function = "unregister_ocr_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.node]
function = "unregisterOcrBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.go]
function = "UnregisterOCRBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.ruby]
function = "unregister_ocr_backend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.php]
function = "unregisterOcrBackend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.java]
function = "unregisterOcrBackend"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.csharp]
function = "UnregisterOcrBackend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.swift]
function = "unregisterOcrBackend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.dart]
function = "unregisterOcrBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.wasm]
function = "unregisterOcrBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.r]
function = "unregister_ocr_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.elixir]
function = "unregister_ocr_backend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.zig]
function = "unregister_ocr_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_ocr_backend.overrides.kotlin_android]
function = "unregister"
class = "OcrBackendBridge"
[crates.e2e.calls.unregister_post_processor]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = [{ name = "name", field = "input.name", type = "string" }]
[crates.e2e.calls.unregister_post_processor.overrides.python]
function = "unregister_post_processor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.rust]
function = "unregister_post_processor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.node]
function = "unregisterPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.go]
function = "UnregisterPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.ruby]
function = "unregister_post_processor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.php]
function = "unregisterPostProcessor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.java]
function = "unregisterPostProcessor"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.csharp]
function = "UnregisterPostProcessor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.swift]
function = "unregisterPostProcessor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.dart]
function = "unregisterPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.wasm]
function = "unregisterPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.r]
function = "unregister_post_processor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.elixir]
function = "unregister_post_processor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.zig]
function = "unregister_post_processor"
module = "kreuzberg"
[crates.e2e.calls.unregister_post_processor.overrides.kotlin_android]
function = "unregister"
class = "PostProcessorBridge"
[crates.e2e.calls.unregister_validator]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = [{ name = "name", field = "input.name", type = "string" }]
[crates.e2e.calls.unregister_validator.overrides.python]
function = "unregister_validator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.rust]
function = "unregister_validator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.node]
function = "unregisterValidator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.go]
function = "UnregisterValidator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.ruby]
function = "unregister_validator"
module = "Kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.php]
function = "unregisterValidator"
module = "Kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.java]
function = "unregisterValidator"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.csharp]
function = "UnregisterValidator"
module = "Kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.swift]
function = "unregisterValidator"
module = "Kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.dart]
function = "unregisterValidator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.wasm]
function = "unregisterValidator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.r]
function = "unregister_validator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.elixir]
function = "unregister_validator"
module = "Kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.zig]
function = "unregister_validator"
module = "kreuzberg"
[crates.e2e.calls.unregister_validator.overrides.kotlin_android]
function = "unregister"
class = "ValidatorBridge"
[crates.e2e.calls.register_ocr_backend]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.register_ocr_backend.overrides.python]
function = "register_ocr_backend"
module = "kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.rust]
function = "register_ocr_backend"
module = "kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.node]
function = "registerOcrBackend"
module = "kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.go]
function = "RegisterOCRBackend"
module = "kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.ruby]
function = "register_ocr_backend"
module = "Kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.php]
function = "registerOcrBackend"
module = "Kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.java]
function = "registerOcrBackend"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.csharp]
function = "RegisterOcrBackend"
module = "Kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.swift]
function = "registerOcrBackend"
module = "Kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.dart]
function = "registerOcrBackend"
module = "kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.wasm]
function = "registerOcrBackend"
module = "kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.r]
function = "register_ocr_backend"
module = "kreuzberg"
arg_name_map = { backend = "r_backend" }
[crates.e2e.calls.register_ocr_backend.overrides.elixir]
function = "register_ocr_backend"
module = "Kreuzberg"
[crates.e2e.calls.register_ocr_backend.overrides.zig]
function = "register_ocr_backend"
module = "kreuzberg"
returns_result = false
[crates.e2e.calls.register_ocr_backend.overrides.kotlin_android]
function = "register"
class = "OcrBackendBridge"
[crates.e2e.calls.register_post_processor]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.register_post_processor.overrides.python]
function = "register_post_processor"
module = "kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.rust]
function = "register_post_processor"
module = "kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.node]
function = "registerPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.go]
function = "RegisterPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.ruby]
function = "register_post_processor"
module = "Kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.php]
function = "registerPostProcessor"
module = "Kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.java]
function = "registerPostProcessor"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.csharp]
function = "RegisterPostProcessor"
module = "Kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.swift]
function = "registerPostProcessor"
module = "Kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.dart]
function = "registerPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.wasm]
function = "registerPostProcessor"
module = "kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.r]
function = "register_post_processor"
module = "kreuzberg"
arg_name_map = { processor = "r_backend" }
[crates.e2e.calls.register_post_processor.overrides.elixir]
function = "register_post_processor"
module = "Kreuzberg"
[crates.e2e.calls.register_post_processor.overrides.zig]
function = "register_post_processor"
module = "kreuzberg"
returns_result = false
[crates.e2e.calls.register_post_processor.overrides.kotlin_android]
function = "register"
class = "PostProcessorBridge"
[crates.e2e.calls.register_validator]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.register_validator.overrides.python]
function = "register_validator"
module = "kreuzberg"
[crates.e2e.calls.register_validator.overrides.rust]
function = "register_validator"
module = "kreuzberg"
[crates.e2e.calls.register_validator.overrides.node]
function = "registerValidator"
module = "kreuzberg"
[crates.e2e.calls.register_validator.overrides.go]
function = "RegisterValidator"
module = "kreuzberg"
[crates.e2e.calls.register_validator.overrides.ruby]
function = "register_validator"
module = "Kreuzberg"
[crates.e2e.calls.register_validator.overrides.php]
function = "registerValidator"
module = "Kreuzberg"
[crates.e2e.calls.register_validator.overrides.java]
function = "registerValidator"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.register_validator.overrides.csharp]
function = "RegisterValidator"
module = "Kreuzberg"
[crates.e2e.calls.register_validator.overrides.swift]
function = "registerValidator"
module = "Kreuzberg"
[crates.e2e.calls.register_validator.overrides.dart]
function = "registerValidator"
module = "kreuzberg"
[crates.e2e.calls.register_validator.overrides.wasm]
function = "registerValidator"
module = "kreuzberg"
[crates.e2e.calls.register_validator.overrides.r]
function = "register_validator"
module = "kreuzberg"
arg_name_map = { validator = "r_backend" }
[crates.e2e.calls.register_validator.overrides.elixir]
function = "register_validator"
module = "Kreuzberg"
[crates.e2e.calls.register_validator.overrides.zig]
function = "register_validator"
module = "kreuzberg"
returns_result = false
[crates.e2e.calls.register_validator.overrides.kotlin_android]
function = "register"
class = "ValidatorBridge"
[crates.e2e.calls.register_embedding_backend]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.register_embedding_backend.overrides.python]
function = "register_embedding_backend"
module = "kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.rust]
function = "register_embedding_backend"
module = "kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.node]
function = "registerEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.go]
function = "RegisterEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.ruby]
function = "register_embedding_backend"
module = "Kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.php]
function = "registerEmbeddingBackend"
module = "Kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.java]
function = "registerEmbeddingBackend"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.csharp]
function = "RegisterEmbeddingBackend"
module = "Kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.swift]
function = "registerEmbeddingBackend"
module = "Kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.dart]
function = "registerEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.wasm]
function = "registerEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.r]
function = "register_embedding_backend"
module = "kreuzberg"
arg_name_map = { backend = "r_backend" }
[crates.e2e.calls.register_embedding_backend.overrides.elixir]
function = "register_embedding_backend"
module = "Kreuzberg"
[crates.e2e.calls.register_embedding_backend.overrides.zig]
function = "register_embedding_backend"
module = "kreuzberg"
returns_result = false
[crates.e2e.calls.register_embedding_backend.overrides.kotlin_android]
function = "register"
class = "EmbeddingBackendBridge"
[crates.e2e.calls.unregister_embedding_backend]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = [{ name = "name", field = "input.name", type = "string" }]
[crates.e2e.calls.unregister_embedding_backend.overrides.python]
function = "unregister_embedding_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.rust]
function = "unregister_embedding_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.node]
function = "unregisterEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.go]
function = "UnregisterEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.ruby]
function = "unregister_embedding_backend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.php]
function = "unregisterEmbeddingBackend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.java]
function = "unregisterEmbeddingBackend"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.csharp]
function = "UnregisterEmbeddingBackend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.swift]
function = "unregisterEmbeddingBackend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.dart]
function = "unregisterEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.wasm]
function = "unregisterEmbeddingBackend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.r]
function = "unregister_embedding_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.elixir]
function = "unregister_embedding_backend"
module = "Kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.zig]
function = "unregister_embedding_backend"
module = "kreuzberg"
[crates.e2e.calls.unregister_embedding_backend.overrides.kotlin_android]
function = "unregister"
class = "EmbeddingBackendBridge"
[crates.e2e.calls.clear_embedding_backends]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.clear_embedding_backends.overrides.python]
function = "clear_embedding_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.rust]
function = "clear_embedding_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.node]
function = "clearEmbeddingBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.go]
function = "ClearEmbeddingBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.ruby]
function = "clear_embedding_backends"
module = "Kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.php]
function = "clearEmbeddingBackends"
module = "Kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.java]
function = "clearEmbeddingBackends"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.csharp]
function = "ClearEmbeddingBackends"
module = "Kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.swift]
function = "clearEmbeddingBackends"
module = "Kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.dart]
function = "clearEmbeddingBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.wasm]
function = "clearEmbeddingBackends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.r]
function = "clear_embedding_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.elixir]
function = "clear_embedding_backends"
module = "Kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.zig]
function = "clear_embedding_backends"
module = "kreuzberg"
[crates.e2e.calls.clear_embedding_backends.overrides.kotlin_android]
function = "clearAll"
class = "EmbeddingBackendBridge"
[crates.e2e.calls.register_document_extractor]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.register_document_extractor.overrides.python]
function = "register_document_extractor"
module = "kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.rust]
function = "register_document_extractor"
module = "kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.node]
function = "registerDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.go]
function = "RegisterDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.ruby]
function = "register_document_extractor"
module = "Kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.php]
function = "registerDocumentExtractor"
module = "Kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.java]
function = "registerDocumentExtractor"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.csharp]
function = "RegisterDocumentExtractor"
module = "Kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.swift]
function = "registerDocumentExtractor"
module = "Kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.dart]
function = "registerDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.wasm]
function = "registerDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.r]
function = "register_document_extractor"
module = "kreuzberg"
arg_name_map = { extractor = "r_backend" }
[crates.e2e.calls.register_document_extractor.overrides.elixir]
function = "register_document_extractor"
module = "Kreuzberg"
[crates.e2e.calls.register_document_extractor.overrides.zig]
function = "register_document_extractor"
module = "kreuzberg"
returns_result = false
[crates.e2e.calls.register_document_extractor.overrides.kotlin_android]
function = "register"
class = "DocumentExtractorBridge"
[crates.e2e.calls.unregister_document_extractor]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = [{ name = "name", field = "input.name", type = "string" }]
[crates.e2e.calls.unregister_document_extractor.overrides.python]
function = "unregister_document_extractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.rust]
function = "unregister_document_extractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.node]
function = "unregisterDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.go]
function = "UnregisterDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.ruby]
function = "unregister_document_extractor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.php]
function = "unregisterDocumentExtractor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.java]
function = "unregisterDocumentExtractor"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.csharp]
function = "UnregisterDocumentExtractor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.swift]
function = "unregisterDocumentExtractor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.dart]
function = "unregisterDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.wasm]
function = "unregisterDocumentExtractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.r]
function = "unregister_document_extractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.elixir]
function = "unregister_document_extractor"
module = "Kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.zig]
function = "unregister_document_extractor"
module = "kreuzberg"
[crates.e2e.calls.unregister_document_extractor.overrides.kotlin_android]
function = "unregister"
class = "DocumentExtractorBridge"
[crates.e2e.calls.clear_document_extractors]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.clear_document_extractors.overrides.python]
function = "clear_document_extractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.rust]
function = "clear_document_extractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.node]
function = "clearDocumentExtractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.go]
function = "ClearDocumentExtractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.ruby]
function = "clear_document_extractors"
module = "Kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.php]
function = "clearDocumentExtractors"
module = "Kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.java]
function = "clearDocumentExtractors"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.csharp]
function = "ClearDocumentExtractors"
module = "Kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.swift]
function = "clearDocumentExtractors"
module = "Kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.dart]
function = "clearDocumentExtractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.wasm]
function = "clearDocumentExtractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.r]
function = "clear_document_extractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.elixir]
function = "clear_document_extractors"
module = "Kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.zig]
function = "clear_document_extractors"
module = "kreuzberg"
[crates.e2e.calls.clear_document_extractors.overrides.kotlin_android]
function = "clearAll"
class = "DocumentExtractorBridge"
[crates.e2e.calls.register_renderer]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.register_renderer.overrides.python]
function = "register_renderer"
module = "kreuzberg"
[crates.e2e.calls.register_renderer.overrides.rust]
function = "register_renderer"
module = "kreuzberg"
[crates.e2e.calls.register_renderer.overrides.node]
function = "registerRenderer"
module = "kreuzberg"
[crates.e2e.calls.register_renderer.overrides.go]
function = "RegisterRenderer"
module = "kreuzberg"
[crates.e2e.calls.register_renderer.overrides.ruby]
function = "register_renderer"
module = "Kreuzberg"
[crates.e2e.calls.register_renderer.overrides.php]
function = "registerRenderer"
module = "Kreuzberg"
[crates.e2e.calls.register_renderer.overrides.java]
function = "registerRenderer"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.register_renderer.overrides.csharp]
function = "RegisterRenderer"
module = "Kreuzberg"
[crates.e2e.calls.register_renderer.overrides.swift]
function = "registerRenderer"
module = "Kreuzberg"
[crates.e2e.calls.register_renderer.overrides.dart]
function = "registerRenderer"
module = "kreuzberg"
[crates.e2e.calls.register_renderer.overrides.wasm]
function = "registerRenderer"
module = "kreuzberg"
[crates.e2e.calls.register_renderer.overrides.r]
function = "register_renderer"
module = "kreuzberg"
arg_name_map = { renderer = "r_backend" }
[crates.e2e.calls.register_renderer.overrides.elixir]
function = "register_renderer"
module = "Kreuzberg"
[crates.e2e.calls.register_renderer.overrides.zig]
function = "register_renderer"
module = "kreuzberg"
returns_result = false
[crates.e2e.calls.register_renderer.overrides.kotlin_android]
function = "register"
class = "RendererBridge"
[crates.e2e.calls.unregister_renderer]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = [{ name = "name", field = "input.name", type = "string" }]
[crates.e2e.calls.unregister_renderer.overrides.python]
function = "unregister_renderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.rust]
function = "unregister_renderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.node]
function = "unregisterRenderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.go]
function = "UnregisterRenderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.ruby]
function = "unregister_renderer"
module = "Kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.php]
function = "unregisterRenderer"
module = "Kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.java]
function = "unregisterRenderer"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.csharp]
function = "UnregisterRenderer"
module = "Kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.swift]
function = "unregisterRenderer"
module = "Kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.dart]
function = "unregisterRenderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.wasm]
function = "unregisterRenderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.r]
function = "unregister_renderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.elixir]
function = "unregister_renderer"
module = "Kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.zig]
function = "unregister_renderer"
module = "kreuzberg"
[crates.e2e.calls.unregister_renderer.overrides.kotlin_android]
function = "unregister"
class = "RendererBridge"
[crates.e2e.calls.clear_renderers]
# Trait-bridge function — excluded from gen_function surface; bridges emit it directly.
result_is_simple = true
function = ""
module = ""
async = false
returns_result = false
returns_void = true
args = []
[crates.e2e.calls.clear_renderers.overrides.python]
function = "clear_renderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.rust]
function = "clear_renderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.node]
function = "clearRenderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.go]
function = "ClearRenderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.ruby]
function = "clear_renderers"
module = "Kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.php]
function = "clearRenderers"
module = "Kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.java]
function = "clearRenderers"
module = "dev.kreuzberg.Kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.csharp]
function = "ClearRenderers"
module = "Kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.swift]
function = "clearRenderers"
module = "Kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.dart]
function = "clearRenderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.wasm]
function = "clearRenderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.r]
function = "clear_renderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.elixir]
function = "clear_renderers"
module = "Kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.zig]
function = "clear_renderers"
module = "kreuzberg"
[crates.e2e.calls.clear_renderers.overrides.kotlin_android]
function = "clearAll"
class = "RendererBridge"
[crates.e2e.calls.embed_texts_async]
result_is_simple = true
result_is_array = true
function = "embed_texts_async"
module = "kreuzberg"
async = true
returns_result = true
skip_languages = ["wasm", "swift"]
options_type = "EmbeddingConfig"
args = [
{ name = "texts", field = "input.texts", type = "json_object", owned = true, element_type = "String" },
{ name = "config", field = "input.config", type = "json_object", optional = true },
]
[crates.e2e.calls.embed_texts_async.overrides.csharp]
result_is_simple = true
options_type = "EmbeddingConfig"
nested_types = { embedding_model = "EmbeddingModelType" }
[crates.e2e.calls.embed_texts_async.overrides.java]
result_is_simple = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts_async.overrides.go]
result_is_simple = true
result_is_array = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts_async.overrides.php]
result_is_simple = true
[crates.e2e.calls.embed_texts_async.overrides.dart]
result_is_simple = true
options_type = "EmbeddingConfig"
[crates.e2e.calls.embed_texts_async.overrides.swift]
options_type = "EmbeddingConfig"
[crates.e2e.calls.get_embedding_preset]
result_is_simple = true
result_is_option = true
function = "get_embedding_preset"
module = "kreuzberg"
async = false
returns_result = false
args = [{ name = "name", field = "input.preset_name", type = "string" }]
[crates.e2e.calls.list_embedding_presets]
result_is_simple = true
result_is_array = true
function = "list_embedding_presets"
module = "kreuzberg"
async = false
returns_result = false
args = []
[crates.e2e.calls.detect_mime_type]
result_is_simple = true
function = "detect_mime_type"
module = "kreuzberg"
async = false
returns_result = true
args = [
{ name = "path", field = "input.path", type = "string" },
{ name = "check_exists", field = "input.check_exists", type = "bool", optional = true },
]
[crates.e2e.calls.render_pdf_page_to_png]
result_is_bytes = true
result_is_simple = true
function = "render_pdf_page_to_png"
module = "kreuzberg"
async = false
returns_result = true
skip_languages = ["wasm"]
args = [
{ name = "pdf_bytes", field = "input.pdf_bytes", type = "bytes" },
{ name = "page_index", field = "input.page_index", type = "int" },
]
[crates.e2e.calls.render_pdf_page_to_png.overrides.rust]
result_is_bytes = true
result_is_simple = true
extra_args = ["None", "None"]
[crates.e2e.calls.render_pdf_page_to_png.overrides.csharp]
result_is_bytes = true
result_is_simple = true
extra_args = ["null", "null"]
[crates.e2e.calls.render_pdf_page_to_png.overrides.go]
result_is_bytes = true
result_is_simple = true
result_is_array = true
extra_args = ["nil", "nil"]
[crates.e2e.calls.render_pdf_page_to_png.overrides.java]
result_is_bytes = true
result_is_simple = true
extra_args = ["null", "null"]
[crates.e2e.calls.render_pdf_page_to_png.overrides.zig]
result_is_bytes = true
result_is_simple = true
extra_args = ["null", "null"]
[crates.e2e.calls.render_pdf_page_to_png.overrides.r]
result_is_bytes = true
result_is_simple = true
extra_args = ["NULL", "NULL"]
[crates.e2e.calls.render_pdf_page_to_png.overrides.swift]
result_is_bytes = true
result_is_simple = true
extra_args = ["dpi: nil", "password: nil"]
[crates.e2e.calls.bold.overrides.python]
function = "bold"
[crates.e2e.calls.italic.overrides.python]
function = "italic"
[crates.e2e.calls.link.overrides.python]
function = "link"
[crates.e2e.calls.code_markup.overrides.python]
function = "code_markup"
[crates.e2e.calls.strikethrough.overrides.python]
function = "strikethrough"
[crates.e2e.calls.underline.overrides.python]
function = "underline"
[crates.e2e.calls.classify_uri.overrides.python]
function = "classify_uri"
[crates.e2e.calls.clean_extracted_text.overrides.python]
function = "clean_extracted_text"
[crates.e2e.calls.escape_html_entities.overrides.python]
function = "escape_html_entities"
[crates.e2e.calls.fix_mojibake.overrides.python]
function = "fix_mojibake"
[crates.e2e.calls.is_valid_utf8.overrides.python]
function = "is_valid_utf8"
[crates.e2e.calls.convert_html_to_markdown.overrides.python]
function = "convert_html_to_markdown"
[crates.e2e.calls.generate_cache_key.overrides.python]
function = "generate_cache_key"
[crates.e2e.calls.validate_cache_key.overrides.python]
function = "validate_cache_key"
[crates.e2e.calls.detect_image_format.overrides.python]
function = "detect_image_format"
[crates.e2e.calls.is_valid_format_field.overrides.python]
function = "is_valid_format_field"
[crates.e2e.calls.normalize_vec.overrides.python]
function = "normalize_vec"
[crates.e2e.calls.list_supported_formats.overrides.python]
function = "list_supported_formats"
[crates.e2e.calls.validate_confidence_fn.overrides.python]
function = "validate_confidence_fn"
[crates.e2e.calls.validate_port.overrides.python]
function = "validate_port"
[crates.e2e.calls.validate_output_format_fn.overrides.python]
function = "validate_output_format_fn"
[crates.e2e.calls.validate_mime_type.overrides.python]
function = "validate_mime_type"
[crates.e2e.calls.validate_tesseract_oem.overrides.python]
function = "validate_tesseract_oem"
[crates.e2e.calls.validate_tesseract_psm.overrides.python]
function = "validate_tesseract_psm"
[crates.e2e.calls.validate_token_reduction_level.overrides.python]
function = "validate_token_reduction_level"
[crates.e2e.calls.validate_language_code.overrides.python]
function = "validate_language_code"
[crates.e2e.calls.validate_ocr_backend.overrides.python]
function = "validate_ocr_backend"
[crates.e2e.calls.validate_binarization_method.overrides.python]
function = "validate_binarization_method"
[crates.e2e.calls.validate_host.overrides.python]
function = "validate_host"
[crates.e2e.calls.extract_doc_text.overrides.python]
function = "extract_doc_text"
[crates.e2e.calls.extract_email_content.overrides.python]
function = "extract_email_content"
[crates.e2e.calls.extract_pptx_from_bytes.overrides.python]
function = "extract_pptx_from_bytes"
[crates.e2e.calls.extract_text_from_pdf.overrides.python]
function = "extract_text_from_pdf"
[crates.e2e.packages.rust]
path = "../../crates/kreuzberg"
[crates.e2e.packages.python]
name = "kreuzberg"
path = "../../packages/python"
[crates.e2e.packages.node]
name = "@kreuzberg/node"
path = "../../crates/kreuzberg-node"
[crates.e2e.packages.go]
module = "github.com/kreuzberg-dev/kreuzberg/v5"
path = "../../packages/go/v5"
[crates.e2e.packages.csharp]
name = "Kreuzberg"
path = "../../packages/csharp/Kreuzberg/Kreuzberg.csproj"
[crates.e2e.packages.java]
module = "dev.kreuzberg"
path = "../../packages/java"
[crates.e2e.packages.ruby]
name = "kreuzberg"
path = "../../packages/ruby"
[crates.e2e.packages.elixir]
module = "Kreuzberg"
path = "../../packages/elixir"
[crates.e2e.packages.php]
name = "kreuzberg-dev/kreuzberg"
path = "../../packages/php"
[crates.e2e.packages.r]
module = "kreuzberg"
path = "../../packages/r"
[crates.e2e.packages.dart]
path = "../../packages/dart"
[crates.e2e.packages.kotlin_android]
module = "dev.kreuzberg"
path = "../../packages/kotlin-android"
[crates.e2e.packages.swift]
path = "../../packages/swift"
[crates.e2e.packages.zig]
path = "../../packages/zig"
# Point WASM e2e at the wasm-pack `nodejs` target. The default `pkg/` is the
# bundler target whose JS glue uses `import * as foo from "env"` statements
# (unresolved namespace imports for getrandom/wasi-bound symbols), which Node
# cannot satisfy at import time. The `pkg/nodejs/` variant inlines those
# imports as `require()` calls handled by wasm-bindgen's Node loader.
[crates.e2e.packages.wasm]
name = "@kreuzberg/wasm"
path = "../../crates/kreuzberg-wasm/pkg/nodejs"
[crates.e2e.format]
python = "ruff check --fix {dir} && ruff format {dir}"
go = "gofmt -w {dir}"
rust = "(cd {dir} && cargo fmt --all)"
[crates.e2e.fields]
chunks_have_content = "chunks_have_content"
[crates.e2e.fields_c_types]
"extraction_result.metadata" = "Metadata"
[crates.e2e.registry]
output = "test_apps"
[crates.scaffold]
description = "High-performance document intelligence library"
license = "Elastic-2.0"
repository = "https://github.com/kreuzberg-dev/kreuzberg"
homepage = "https://kreuzberg.dev"
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
keywords = ["document", "extraction", "pdf", "ocr", "text"]
[crates.scaffold.cargo]
[crates.readme]
template_dir = "templates/readme"
snippets_dir = "docs/snippets"
output_pattern = "packages/{language}/README.md"
discord_url = "https://discord.gg/xt9WY3GnKR"
banner_url = "https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0"
[crates.readme.languages.rust]
template = "root.md"
name = "Kreuzberg"
output_path = "README.md"
[crates.readme.languages.python]
template = "python.md"
name = "Python"
package_name = "kreuzberg"
badge_url = "https://img.shields.io/pypi/v/kreuzberg?label=Python&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Native Python bindings with async/await support, multiple OCR backends (Tesseract, EasyOCR, PaddleOCR), and extensible plugin system."
package_manager = ["pip"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "easyocr", "paddleocr"]
optional_sections = ["async_vs_sync_performance", "ocr_backends", "system_requirements"]
snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/extract_with_ocr.md", ocr_configuration = "getting-started/hello_world.md", table_extraction = "getting-started/read_content.md" }
[crates.readme.languages.typescript]
template = "language_package.md"
output_path = "crates/kreuzberg-node/README.md"
name = "TypeScript (Node.js)"
package_name = "@kreuzberg/node"
badge_url = "https://img.shields.io/npm/v/@kreuzberg/node?label=TypeScript&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Native NAPI-RS bindings for Node.js with superior performance, async/await support, and TypeScript type definitions."
package_manager = ["pnpm"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, worker_pools = true, config_discovery = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = [
"native_vs_wasm_comparison",
"ocr_support",
"performance",
"napi_implementation",
]
snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file_async.md", batch_processing = "getting-started/batch_extract_files_sync.md", ocr_configuration = "config/config_ocr.md", table_extraction = "api/tables.md", config_discovery = "config/config_discovery.md", worker_pool = "advanced/worker_pool.md" }
[crates.readme.languages.go]
template = "go.md"
output_path = "packages/go/v5/README.md"
name = "Go"
package_name = "github.com/kreuzberg-dev/kreuzberg/v5"
badge_url = "https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Go bindings with context-aware async support, idiomatic Go API, and CGO-based native performance."
package_manager = ["go get"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements", "ffi_build_instructions"]
snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/extract_with_ocr.md", ocr_configuration = "getting-started/hello_world.md" }
[crates.readme.languages.java]
template = "language_package.md"
name = "Java"
package_name = "dev.kreuzberg:kreuzberg"
badge_url = "https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Java bindings with type-safe API, Foreign Function & Memory API integration, and native performance."
package_manager = ["maven", "gradle"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["maven_gradle_setup", "system_requirements"]
snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md", configuration = "config/config_basic.md" }
[crates.readme.languages.php]
template = "language_package.md"
name = "PHP"
package_name = "kreuzberg/kreuzberg"
badge_url = "https://img.shields.io/packagist/v/kreuzberg/kreuzberg?label=PHP&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. PHP bindings with modern PHP 8.2+ support and type-safe API."
package_manager = ["composer"]
features = { ocr = true, async = false, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["composer_installation", "system_requirements"]
snippets = { basic_extraction = "quickstart/basic_extraction_oop.php", batch_processing = "extraction/batch_processing.php", ocr_configuration = "ocr/basic_ocr.php" }
[crates.readme.languages.ruby]
template = "ruby.md"
name = "Ruby"
package_name = "kreuzberg"
badge_url = "https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Ruby bindings with idiomatic Ruby API and native performance."
package_manager = ["rubygems", "bundler"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements", "development_setup"]
snippets = { basic_extraction = "getting-started/basic_usage.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/install_verify.md", ocr_configuration = "getting-started/extract_with_ocr.md" }
[crates.readme.languages.csharp]
template = "language_package.md"
name = "C#"
package_name = "Kreuzberg"
badge_url = "https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. .NET bindings with full type safety, async/await support, and .NET 10.0+ compatibility."
package_manager = ["nuget"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["dotnet_installation", "system_requirements"]
snippets = { basic_extraction = "getting-started/basic_usage.cs", async_extraction = "advanced/async_extraction.cs", batch_processing = "advanced/batch_processing.cs", ocr_configuration = "ocr/tesseract_backend.cs" }
[crates.readme.languages.elixir]
template = "language_package.md"
name = "Elixir"
package_name = "kreuzberg"
badge_url = "https://img.shields.io/hexpm/v/kreuzberg?label=Elixir&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Elixir bindings with native BEAM concurrency, OTP integration, and idiomatic Elixir API."
package_manager = ["mix"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["mix_installation", "async_with_tasks", "plugin_system"]
snippets = { basic_extraction = "getting-started/basic_usage.exs", async_extraction = "getting-started/extract_file.exs", batch_processing = "core/batch_extract_files_sync.exs", ocr_configuration = "ocr/tesseract_basic.exs", plugin_system = "plugins/word_count_processor.exs" }
[crates.readme.languages.wasm]
template = "language_package.md"
output_path = "crates/kreuzberg-wasm/README.md"
name = "WebAssembly"
package_name = "@kreuzberg/wasm"
badge_url = "https://img.shields.io/npm/v/@kreuzberg/wasm?label=WASM&color=007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. WebAssembly bindings for browsers, Deno, and Cloudflare Workers with portable deployment and multi-threading support."
package_manager = ["pnpm"]
features = { ocr = true, async = true, plugin_system = true, embeddings = false, code_intelligence = true }
ocr_backends = ["tesseract-wasm"]
optional_sections = ["multi_threading", "ocr_support", "performance_comparison"]
snippets = { basic_extraction = "getting-started/basic-extract.ts", browser_extraction = "getting-started/browser-file-input.ts", node_extraction = "getting-started/basic-extract.ts", async_extraction = "getting-started/async-extraction.ts", cloudflare_workers = "getting-started/runtime-detection.ts", batch_processing = "getting-started/batch-processing.ts", ocr_configuration = "ocr/enable-ocr.ts" }
[crates.readme.languages.r]
template = "language_package.md"
name = "R"
package_name = "kreuzberg"
badge_url = "https://img.shields.io/badge/R-kreuzberg-007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. R bindings with native R API, data frame integration, and high-performance document extraction."
package_manager = ["install.packages"]
features = { ocr = true, async = false, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements"]
snippets = { basic_extraction = "getting-started/basic_extraction.md", async_extraction = "getting-started/extract_file.md", batch_processing = "getting-started/extract_with_ocr.md", ocr_configuration = "ocr/ocr_extraction.md" }
[crates.readme.languages.ffi]
template = "language_package.md"
output_path = "crates/kreuzberg-ffi/README.md"
name = "FFI (C/C++)"
package_name = "kreuzberg-ffi"
badge_url = "https://img.shields.io/badge/C%2FC%2B%2B-kreuzberg--ffi-007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. C/C++ FFI bindings providing a stable ABI for native integration, shared library distribution, and cross-language interop."
package_manager = ["cargo"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
[crates.readme.languages.kotlin_android]
template = "language_package.md"
name = "Kotlin (Android)"
package_name = "dev.kreuzberg:kreuzberg-android"
badge_url = "https://img.shields.io/badge/Android-kreuzberg-3DDC84"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Android library (AAR) with bundled jniLibs/arm64-v8a and jniLibs/x86_64 — Gradle automatically picks up the native cdylib for emulator and device builds. Server-side Kotlin/JVM consumers can use the Java binding directly via standard Kotlin/Java interop."
package_manager = ["gradle", "maven"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements"]
snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" }
[crates.readme.languages.swift]
template = "language_package.md"
name = "Swift"
package_name = "Kreuzberg"
badge_url = "https://img.shields.io/badge/Swift-kreuzberg-007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Swift bindings via swift-bridge for macOS, iOS, and Linux, with native Swift types and async/await support."
package_manager = ["spm"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements"]
snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" }
[crates.readme.languages.dart]
template = "language_package.md"
name = "Dart / Flutter"
package_name = "kreuzberg"
badge_url = "https://img.shields.io/badge/Dart-kreuzberg-007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Dart bindings via flutter_rust_bridge for both Flutter apps and pure-Dart server contexts, with isolate-safe Future/Stream APIs."
package_manager = ["pub"]
features = { ocr = true, async = true, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements"]
snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" }
[crates.readme.languages.zig]
template = "language_package.md"
name = "Zig"
package_name = "kreuzberg"
badge_url = "https://img.shields.io/badge/Zig-kreuzberg-007ec6"
docs_url = "https://kreuzberg.dev/"
description = "Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Zig bindings consuming the C FFI surface via @cImport, idiomatic error sets, optional types, and slice-based memory management."
package_manager = ["zig"]
features = { ocr = true, async = false, plugin_system = true, embeddings = true, code_intelligence = true }
ocr_backends = ["tesseract", "paddleocr"]
optional_sections = ["system_requirements"]
snippets = { basic_extraction = "api/extract_file_sync.md", async_extraction = "api/extract_file_async.md", batch_processing = "api/batch_extract_files_sync.md", ocr_configuration = "ocr/ocr_extraction.md" }
[[crates.trait_bridges]]
trait_name = "OcrBackend"
super_trait = "kreuzberg::plugins::Plugin"
registry_getter = "kreuzberg::plugins::registry::get_ocr_backend_registry"
register_fn = "register_ocr_backend"
unregister_fn = "unregister_ocr_backend"
clear_fn = "clear_ocr_backends"
[[crates.trait_bridges]]
trait_name = "PostProcessor"
super_trait = "kreuzberg::plugins::Plugin"
registry_getter = "kreuzberg::plugins::registry::get_post_processor_registry"
register_fn = "register_post_processor"
unregister_fn = "unregister_post_processor"
clear_fn = "clear_post_processors"
[[crates.trait_bridges]]
trait_name = "Validator"
super_trait = "kreuzberg::plugins::Plugin"
registry_getter = "kreuzberg::plugins::registry::get_validator_registry"
register_fn = "register_validator"
unregister_fn = "unregister_validator"
clear_fn = "clear_validators"
[[crates.trait_bridges]]
trait_name = "EmbeddingBackend"
super_trait = "kreuzberg::plugins::Plugin"
registry_getter = "kreuzberg::plugins::registry::get_embedding_backend_registry"
register_fn = "register_embedding_backend"
unregister_fn = "unregister_embedding_backend"
clear_fn = "clear_embedding_backends"
[[crates.trait_bridges]]
trait_name = "DocumentExtractor"
super_trait = "kreuzberg::plugins::Plugin"
registry_getter = "kreuzberg::plugins::registry::get_document_extractor_registry"
register_fn = "register_document_extractor"
unregister_fn = "unregister_document_extractor"
clear_fn = "clear_document_extractors"
# `as_sync_extractor` returns `Option<&dyn SyncExtractor>` — a trait-object reference that
# cannot traverse the C FFI boundary. Skip it in the FFI vtable and let the default impl
# (returns None) take over.
ffi_skip_methods = ["as_sync_extractor"]
[[crates.trait_bridges]]
trait_name = "Renderer"
super_trait = "kreuzberg::plugins::Plugin"
registry_getter = "kreuzberg::plugins::registry::get_renderer_registry"
register_fn = "register_renderer"
unregister_fn = "unregister_renderer"
clear_fn = "clear_renderers"