[build-system] build-backend = "maturin" requires = [ "maturin>=1,<2" ] [project] name = "kreuzberg" version = "5.0.0rc3" description = "High-performance document intelligence library" keywords = [ "document", "extraction", "ocr", "pdf", "text" ] license = "Elastic-2.0" license-files = [ "LICENSE" ] authors = [ { name = "Na'aman Hirschfeld " } ] requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", ] urls.repository = "https://github.com/kreuzberg-dev/kreuzberg" homepage = "https://kreuzberg.dev" [dependency-groups] dev = [ "mypy>=1.19", "ruff>=0.14.8" ] [tool.maturin] module-name = "kreuzberg._kreuzberg" manifest-path = "../../crates/kreuzberg-py/Cargo.toml" # abi3-py310 produces a single wheel per platform that loads on Python 3.10+, # avoiding a per-Python-version build matrix. features = [ "pyo3/extension-module", "pyo3/abi3-py310" ] python-packages = [ "kreuzberg" ] # Bundle the core Rust crate so `pip install` can build from sdist on # platforms without a precompiled wheel (e.g. Alpine/musl). Without this # the workspace [patch.crates-io] (when present) points at a path that is # missing from the tarball and the source build fails. include = [ { path = "../../crates/kreuzberg/**/*", format = "sdist" }, ] [tool.ruff] target-version = "py310" line-length = 120 format.docstring-code-line-length = 120 format.docstring-code-format = true lint.select = [ "ALL" ] lint.ignore = [ "ANN401", "ASYNC109", "ASYNC110", "BLE001", "COM812", "D100", "D104", "D107", "D205", "E501", "EM", "FBT", "FIX", "ISC001", "PD011", "PGH003", "PLR2004", "PLW0603", "S104", "S110", "S603", "TD", "TRY", ] lint.per-file-ignores."kreuzberg/__init__.py" = [ "I001" ] # The alef Python codegen still emits cosmetic warnings on the wrapper # modules: api.py keeps the legacy `from typing import AsyncIterator` and a # single-line import block, options.py carries # noqa: TC001 / F401 markers # that turn out unused on every regen, __init__.py star-imports re-sort with # a different convention. Silence these specific rules on the wrappers until # the codegen is updated to emit ruff-clean output. lint.per-file-ignores."kreuzberg/api.py" = [ "F401", "I001", "UP035" ] lint.per-file-ignores."kreuzberg/options.py" = [ "F401", "RUF100" ] lint.per-file-ignores."tests/**" = [ "ANN", "D103", "PLR2004", "S101" ] lint.mccabe.max-complexity = 15 lint.pydocstyle.convention = "google" lint.pylint.max-args = 10 lint.pylint.max-branches = 15 lint.pylint.max-returns = 10 [tool.mypy] python_version = "3.10" strict = true show_error_codes = true implicit_reexport = false namespace_packages = true overrides = [ # The alef-emitted `api.py` wrapper has a structural mismatch between its # `options.*` dataclass signatures and the `_internal_bindings.*` pyclass # types pyo3 accepts/returns at runtime. pyo3 reconciles them dynamically via # FromPyObject — the Python e2e suite exercises the runtime path — but mypy # sees only the static-type discrepancy. Disable the four error codes the # discrepancy raises until the codegen emits matching `_to_rust_*` calls and # casts the return values. { module = "kreuzberg.api", disable_error_code = [ "call-arg", "arg-type", "return-value", "attr-defined" ] }, ]