Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

275
pyproject.toml Normal file
View File

@@ -0,0 +1,275 @@
[build-system]
build-backend = "maturin"
requires = [ "maturin>=1.13.1,<2" ]
[dependency-groups]
dev = [
"covdefaults>=2.3.0",
"httpx>=0.28.1",
"maturin>=1.13.1",
"mypy>=2.1.0",
"prek>=0.3.13",
"pyproject-fmt>=2.21.2",
"pytest>=9.0.3",
"pytest-asyncio>=1.3.0",
"pytest-cov>=7.1.0",
"pytest-mock>=3.15.1",
"pytest-rerunfailures>=16.2",
"pytest-timeout>=2.4.0",
"ruff>=0.15.13",
"types-pillow>=10.2",
"types-psutil>=7.2.2.20260508",
"validate-pyproject>=0.24.1",
]
bench-docling = [ "docling>=2.64.1" ]
bench-markitdown = [ "markitdown[all]>=0.1.4" ]
bench-mineru = [ "mineru[pipeline]>=2.6.7", "onnxruntime" ]
bench-pdfminer = [ "pdfminer-six>=20231228" ]
bench-pdfplumber = [ "pdfplumber>=0.11.4" ]
bench-pdftotext = [ "pdftotext>=2.2.2" ]
bench-playa-pdf = [ "playa-pdf>=0.6.0" ]
bench-pymupdf4llm = [ "pillow>=10.0.0", "pymupdf-layout>=0.0.1", "pymupdf4llm>=0.0.17" ]
bench-pypdf = [ "pypdf>=4.0.0" ]
bench-unstructured = [ "unstructured[all-docs]>=0.18.21" ]
doc = [ "mkdocstrings>=1.0.4", "mkdocstrings-python>=2.0.3", "zensical" ]
[tool.uv]
workspace.members = [ "packages/python" ]
conflicts = [
[
{ package = "kreuzberg", group = "bench-docling" },
{ package = "kreuzberg", group = "bench-markitdown" },
{ package = "kreuzberg", group = "bench-mineru" },
{ package = "kreuzberg", group = "bench-pdfminer" },
{ package = "kreuzberg", group = "bench-pdfplumber" },
{ package = "kreuzberg", group = "bench-pdftotext" },
{ package = "kreuzberg", group = "bench-playa-pdf" },
{ package = "kreuzberg", group = "bench-pymupdf4llm" },
{ package = "kreuzberg", group = "bench-pypdf" },
{ package = "kreuzberg", group = "bench-unstructured" },
],
]
cache-keys = [
{ file = "pyproject.toml" },
{ file = "Cargo.toml" },
{ file = "Cargo.lock" },
{ file = "crates/**/*.rs" },
{ file = "crates/**/Cargo.toml" },
]
[tool.ruff]
target-version = "py310"
line-length = 120
src = [ "packages/python/*" ]
extend-exclude = [ "benchmarks", "crates", "docs/snippets", "scripts", "test_documents" ]
format.docstring-code-line-length = 120
format.docstring-code-format = true
lint.select = [ "ALL" ]
lint.ignore = [
"ANN401",
"ASYNC109",
"ASYNC110",
"BLE001",
"COM812",
"D100",
"D104",
"D107",
"D205",
"E501",
"EM",
"FBT",
"FIX", # We allow todo and fixme comments
"ISC001",
"PD011",
"PGH003",
"PLR2004",
"PLW0603",
"S104",
"S110",
"S603",
"TD", # We allow todo and fixme comments
"TRY",
]
lint.per-file-ignores."**/*.pyi" = [ "PYI021" ]
lint.per-file-ignores."**/benchmarks/**/*.*" = [
"BLE001",
"C901",
"D101",
"D102",
"D103",
"D105",
"PERF203",
"PLC0415",
"PLR0912",
"PLR0913",
"PLR0915",
"S101",
"S110",
"SLF001",
]
lint.per-file-ignores."**/tests/**/*.*" = [
"A005",
"ANN",
"ARG001",
"ARG002",
"ASYNC230",
"BLE001",
"D",
"N806",
"N815",
"PD",
"PGH003",
"PLC",
"PLR0915",
"PLR2004",
"PT006",
"PT007",
"PT013",
"PT017",
"PT031",
"RUF012",
"S",
"SIM117",
"SLF001",
]
lint.per-file-ignores."e2e/python/**/*.py" = [ "A001", "B017", "PT011" ]
lint.per-file-ignores."e2e/python/conftest.py" = [ "ANN001", "ANN201", "ARG001" ]
lint.per-file-ignores."packages/python/build.py" = [ "INP001" ]
lint.per-file-ignores."packages/python/tests/e2e/*_test.py" = [ "T201" ]
lint.per-file-ignores."scripts/**/*.py" = [
"C901",
"D101",
"D102",
"D103",
"PERF401",
"PLC0415",
"PLR0912",
"PLR0915",
"PLW2901",
"SIM105",
]
lint.per-file-ignores."tools/benchmark-harness/scripts/*.py" = [
"ANN001",
"ANN201",
"ANN202",
"ARG001",
"BLE001",
"C901",
"D103",
"E741",
"EXE001",
"EXE002",
"F841",
"INP001",
"N818",
"PERF203",
"PERF401",
"PIE810",
"PLC0206",
"PLC0415",
"PLR0911",
"PLR0912",
"PLR0915",
"PLR1714",
"PLW1510",
"PLW2901",
"PTH110",
"PTH112",
"PTH118",
"PTH119",
"PTH122",
"PTH123",
"RET504",
"S314",
"S607",
"SIM108",
"SIM115",
"T201",
]
lint.per-file-ignores."tools/perf/*.py" = [ "D103", "T201" ]
lint.isort.known-first-party = [ "kreuzberg" ]
lint.mccabe.max-complexity = 15
lint.pydocstyle.convention = "google"
lint.pylint.max-args = 10
lint.pylint.max-branches = 15
lint.pylint.max-returns = 10
[tool.pyproject-fmt]
keep_full_version = true
max_supported_python = "3.14"
[tool.mypy]
packages = [ "kreuzberg" ]
exclude = [ "examples", "benchmarks", "crates", "e2e/smoke", "test_apps" ]
python_version = "3.10"
disable_error_code = [ "import-untyped", "untyped-decorator" ]
implicit_reexport = false
show_error_codes = true
strict = true
namespace_packages = true
[[tool.mypy.overrides]]
module = [
"keybert",
"easyocr",
"torch",
"kreuzberg._internal_bindings",
"docling.*",
"markitdown",
"unstructured.*",
"httpx",
]
ignore_missing_imports = true
ignore_errors = true
follow_imports = "skip"
[[tool.mypy.overrides]]
module = [ "numpy", "numpy.*" ]
ignore_missing_imports = true
[[tool.mypy.overrides]]
module = [ "tests.test_apps.python.*" ]
ignore_errors = true
follow_imports = "skip"
[[tool.mypy.overrides]]
module = [ "examples.python.*" ]
ignore_errors = true
follow_imports = "skip"
[tool.pytest]
ini_options.timeout = 300
ini_options.testpaths = [ "packages/python/tests" ]
ini_options.norecursedirs = [ "dist", "build", "*.egg" ]
ini_options.markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks tests as integration tests (requires running services)",
]
ini_options.asyncio_mode = "auto"
ini_options.asyncio_default_fixture_loop_scope = "function"
ini_options.filterwarnings = [
"ignore:Exception ignored in:pytest.PytestUnraisableExceptionWarning",
"ignore:pkg_resources is deprecated as an API:DeprecationWarning",
"ignore:ast.Num is deprecated and will be removed in Python 3.14:DeprecationWarning",
"ignore:datetime.datetime.utcfromtimestamp() is deprecated:DeprecationWarning",
"ignore:Deprecated call to `pkg_resources.declare_namespace('google')`:DeprecationWarning",
]
[tool.coverage]
run.branch = true
run.omit = [ "benchmarks/*", "packages/python/tests/*", "scripts/*" ]
run.plugins = [ "covdefaults" ]
run.source = [ "packages/python/kreuzberg" ]
report.exclude_lines = [
"@(abc\\.)?abstractmethod",
"class .*\\bProtocol\\):",
"def __repr__",
"except ImportError:",
"if __name__ == .__main__.:",
"if sys.version_info",
"if TYPE_CHECKING:",
"pragma: no cover",
"raise NotImplementedError",
]
report.fail_under = 90
report.omit = [ "packages/python/kreuzberg/ocr/easyocr.py" ]