[build-system] build-backend = "maturin" requires = [ "maturin>=1.13.1,<2" ] [dependency-groups] dev = [ "covdefaults>=2.3.0", "httpx>=0.28.1", "maturin>=1.13.1", "mypy>=2.1.0", "prek>=0.3.13", "pyproject-fmt>=2.21.2", "pytest>=9.0.3", "pytest-asyncio>=1.3.0", "pytest-cov>=7.1.0", "pytest-mock>=3.15.1", "pytest-rerunfailures>=16.2", "pytest-timeout>=2.4.0", "ruff>=0.15.13", "types-pillow>=10.2", "types-psutil>=7.2.2.20260508", "validate-pyproject>=0.24.1", ] bench-docling = [ "docling>=2.64.1" ] bench-markitdown = [ "markitdown[all]>=0.1.4" ] bench-mineru = [ "mineru[pipeline]>=2.6.7", "onnxruntime" ] bench-pdfminer = [ "pdfminer-six>=20231228" ] bench-pdfplumber = [ "pdfplumber>=0.11.4" ] bench-pdftotext = [ "pdftotext>=2.2.2" ] bench-playa-pdf = [ "playa-pdf>=0.6.0" ] bench-pymupdf4llm = [ "pillow>=10.0.0", "pymupdf-layout>=0.0.1", "pymupdf4llm>=0.0.17" ] bench-pypdf = [ "pypdf>=4.0.0" ] bench-unstructured = [ "unstructured[all-docs]>=0.18.21" ] doc = [ "mkdocstrings>=1.0.4", "mkdocstrings-python>=2.0.3", "zensical" ] [tool.uv] workspace.members = [ "packages/python" ] conflicts = [ [ { package = "kreuzberg", group = "bench-docling" }, { package = "kreuzberg", group = "bench-markitdown" }, { package = "kreuzberg", group = "bench-mineru" }, { package = "kreuzberg", group = "bench-pdfminer" }, { package = "kreuzberg", group = "bench-pdfplumber" }, { package = "kreuzberg", group = "bench-pdftotext" }, { package = "kreuzberg", group = "bench-playa-pdf" }, { package = "kreuzberg", group = "bench-pymupdf4llm" }, { package = "kreuzberg", group = "bench-pypdf" }, { package = "kreuzberg", group = "bench-unstructured" }, ], ] cache-keys = [ { file = "pyproject.toml" }, { file = "Cargo.toml" }, { file = "Cargo.lock" }, { file = "crates/**/*.rs" }, { file = "crates/**/Cargo.toml" }, ] [tool.ruff] target-version = "py310" line-length = 120 src = [ "packages/python/*" ] extend-exclude = [ "benchmarks", "crates", "docs/snippets", "scripts", "test_documents" ] format.docstring-code-line-length = 120 format.docstring-code-format = true lint.select = [ "ALL" ] lint.ignore = [ "ANN401", "ASYNC109", "ASYNC110", "BLE001", "COM812", "D100", "D104", "D107", "D205", "E501", "EM", "FBT", "FIX", # We allow todo and fixme comments "ISC001", "PD011", "PGH003", "PLR2004", "PLW0603", "S104", "S110", "S603", "TD", # We allow todo and fixme comments "TRY", ] lint.per-file-ignores."**/*.pyi" = [ "PYI021" ] lint.per-file-ignores."**/benchmarks/**/*.*" = [ "BLE001", "C901", "D101", "D102", "D103", "D105", "PERF203", "PLC0415", "PLR0912", "PLR0913", "PLR0915", "S101", "S110", "SLF001", ] lint.per-file-ignores."**/tests/**/*.*" = [ "A005", "ANN", "ARG001", "ARG002", "ASYNC230", "BLE001", "D", "N806", "N815", "PD", "PGH003", "PLC", "PLR0915", "PLR2004", "PT006", "PT007", "PT013", "PT017", "PT031", "RUF012", "S", "SIM117", "SLF001", ] lint.per-file-ignores."e2e/python/**/*.py" = [ "A001", "B017", "PT011" ] lint.per-file-ignores."e2e/python/conftest.py" = [ "ANN001", "ANN201", "ARG001" ] lint.per-file-ignores."packages/python/build.py" = [ "INP001" ] lint.per-file-ignores."packages/python/tests/e2e/*_test.py" = [ "T201" ] lint.per-file-ignores."scripts/**/*.py" = [ "C901", "D101", "D102", "D103", "PERF401", "PLC0415", "PLR0912", "PLR0915", "PLW2901", "SIM105", ] lint.per-file-ignores."tools/benchmark-harness/scripts/*.py" = [ "ANN001", "ANN201", "ANN202", "ARG001", "BLE001", "C901", "D103", "E741", "EXE001", "EXE002", "F841", "INP001", "N818", "PERF203", "PERF401", "PIE810", "PLC0206", "PLC0415", "PLR0911", "PLR0912", "PLR0915", "PLR1714", "PLW1510", "PLW2901", "PTH110", "PTH112", "PTH118", "PTH119", "PTH122", "PTH123", "RET504", "S314", "S607", "SIM108", "SIM115", "T201", ] lint.per-file-ignores."tools/perf/*.py" = [ "D103", "T201" ] lint.isort.known-first-party = [ "kreuzberg" ] lint.mccabe.max-complexity = 15 lint.pydocstyle.convention = "google" lint.pylint.max-args = 10 lint.pylint.max-branches = 15 lint.pylint.max-returns = 10 [tool.pyproject-fmt] keep_full_version = true max_supported_python = "3.14" [tool.mypy] packages = [ "kreuzberg" ] exclude = [ "examples", "benchmarks", "crates", "e2e/smoke", "test_apps" ] python_version = "3.10" disable_error_code = [ "import-untyped", "untyped-decorator" ] implicit_reexport = false show_error_codes = true strict = true namespace_packages = true [[tool.mypy.overrides]] module = [ "keybert", "easyocr", "torch", "kreuzberg._internal_bindings", "docling.*", "markitdown", "unstructured.*", "httpx", ] ignore_missing_imports = true ignore_errors = true follow_imports = "skip" [[tool.mypy.overrides]] module = [ "numpy", "numpy.*" ] ignore_missing_imports = true [[tool.mypy.overrides]] module = [ "tests.test_apps.python.*" ] ignore_errors = true follow_imports = "skip" [[tool.mypy.overrides]] module = [ "examples.python.*" ] ignore_errors = true follow_imports = "skip" [tool.pytest] ini_options.timeout = 300 ini_options.testpaths = [ "packages/python/tests" ] ini_options.norecursedirs = [ "dist", "build", "*.egg" ] ini_options.markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "integration: marks tests as integration tests (requires running services)", ] ini_options.asyncio_mode = "auto" ini_options.asyncio_default_fixture_loop_scope = "function" ini_options.filterwarnings = [ "ignore:Exception ignored in:pytest.PytestUnraisableExceptionWarning", "ignore:pkg_resources is deprecated as an API:DeprecationWarning", "ignore:ast.Num is deprecated and will be removed in Python 3.14:DeprecationWarning", "ignore:datetime.datetime.utcfromtimestamp() is deprecated:DeprecationWarning", "ignore:Deprecated call to `pkg_resources.declare_namespace('google')`:DeprecationWarning", ] [tool.coverage] run.branch = true run.omit = [ "benchmarks/*", "packages/python/tests/*", "scripts/*" ] run.plugins = [ "covdefaults" ] run.source = [ "packages/python/kreuzberg" ] report.exclude_lines = [ "@(abc\\.)?abstractmethod", "class .*\\bProtocol\\):", "def __repr__", "except ImportError:", "if __name__ == .__main__.:", "if sys.version_info", "if TYPE_CHECKING:", "pragma: no cover", "raise NotImplementedError", ] report.fail_under = 90 report.omit = [ "packages/python/kreuzberg/ocr/easyocr.py" ]