This commit is contained in:
1
e2e/python/__init__.py
generated
Normal file
1
e2e/python/__init__.py
generated
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
18
e2e/python/conftest.py
generated
Normal file
18
e2e/python/conftest.py
generated
Normal file
@@ -0,0 +1,18 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
"""Pytest configuration for e2e tests."""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure the package is importable.
|
||||
# The kreuzberg package is expected to be installed in the current environment.
|
||||
|
||||
# Change to the configured test-documents directory so that fixture file
|
||||
# paths like "pdf/fake_memo.pdf" resolve correctly when running pytest
|
||||
# from e2e/python/.
|
||||
_TEST_DOCUMENTS = Path(__file__).parent.parent.parent / "test_documents"
|
||||
if _TEST_DOCUMENTS.is_dir():
|
||||
os.chdir(_TEST_DOCUMENTS)
|
||||
50
e2e/python/pyproject.toml
generated
Normal file
50
e2e/python/pyproject.toml
generated
Normal file
@@ -0,0 +1,50 @@
|
||||
[build-system]
|
||||
build-backend = "setuptools.build_meta"
|
||||
requires = [
|
||||
"setuptools>=68",
|
||||
"wheel",
|
||||
]
|
||||
|
||||
[project]
|
||||
name = "kreuzberg-e2e"
|
||||
version = "0.0.0"
|
||||
description = "End-to-end tests"
|
||||
requires-python = ">=3.10"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
]
|
||||
dependencies = [
|
||||
"kreuzberg",
|
||||
"pytest>=7.4",
|
||||
"pytest-asyncio>=0.23",
|
||||
"pytest-timeout>=2.1",
|
||||
]
|
||||
|
||||
[tool.setuptools]
|
||||
packages = []
|
||||
|
||||
[tool.uv]
|
||||
sources.kreuzberg = { path = "../../packages/python" }
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
lint.ignore = ["PLR2004"]
|
||||
lint.per-file-ignores."tests/**" = [
|
||||
"B017",
|
||||
"PT011",
|
||||
"S101",
|
||||
"S108",
|
||||
]
|
||||
|
||||
[tool.pytest]
|
||||
ini_options.asyncio_mode = "auto"
|
||||
ini_options.testpaths = ["tests"]
|
||||
ini_options.python_files = "test_*.py"
|
||||
ini_options.python_functions = "test_*"
|
||||
ini_options.addopts = "-v --strict-markers --tb=short"
|
||||
ini_options.timeout = 300
|
||||
1
e2e/python/tests/__init__.py
generated
Normal file
1
e2e/python/tests/__init__.py
generated
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
64
e2e/python/tests/test_async.py
generated
Normal file
64
e2e/python/tests/test_async.py
generated
Normal file
@@ -0,0 +1,64 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: async."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import extract_bytes, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_async_extract_bytes() -> None:
|
||||
"""Async extract_bytes call on PDF document."""
|
||||
content = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
mime_type = "application/pdf"
|
||||
|
||||
result = await extract_bytes(content, mime_type, None)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 50 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_async_extract_bytes_empty_mime() -> None:
|
||||
"""extract_bytes empty MIME async."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
content = Path("text/plain.txt").read_bytes()
|
||||
mime_type = ""
|
||||
config = ExtractionConfig()
|
||||
await extract_bytes(content, mime_type, config)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_async_extract_bytes_invalid_mime() -> None:
|
||||
"""extract_bytes unsupported MIME async."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
content = Path("text/plain.txt").read_bytes()
|
||||
mime_type = "application/x-nonexistent"
|
||||
config = ExtractionConfig()
|
||||
await extract_bytes(content, mime_type, config)
|
||||
109
e2e/python/tests/test_batch.py
generated
Normal file
109
e2e/python/tests/test_batch.py
generated
Normal file
@@ -0,0 +1,109 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: batch."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import batch_extract_bytes_sync, batch_extract_bytes, batch_extract_files, batch_extract_files_sync, BatchBytesItem, BatchFileItem
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_batch_bytes_invalid_mime() -> None:
|
||||
"""batch_extract_bytes_sync invalid MIME."""
|
||||
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111]), mime_type="application/x-nonexistent")]
|
||||
|
||||
_ = batch_extract_bytes_sync(items, None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_extract_bytes_happy() -> None:
|
||||
"""batch_extract_bytes: happy path with mixed inputs."""
|
||||
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]), mime_type="text/plain"), BatchBytesItem(content=bytes([60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62]), mime_type="text/html")]
|
||||
|
||||
result = await batch_extract_bytes(items, None)
|
||||
assert len(result) >= 1 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_extract_bytes_mixed_format() -> None:
|
||||
"""batch_extract_bytes: handles unsupported MIME gracefully."""
|
||||
items = [BatchBytesItem(content=bytes([80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114]), mime_type="application/x-unknown")]
|
||||
|
||||
_ = await batch_extract_bytes(items, None)
|
||||
|
||||
|
||||
def test_batch_extract_bytes_sync_empty_list() -> None:
|
||||
"""batch_extract_bytes_sync: empty batch."""
|
||||
items = []
|
||||
|
||||
result = batch_extract_bytes_sync(items, None)
|
||||
assert len(result) == 0 # noqa: S101
|
||||
|
||||
|
||||
def test_batch_extract_bytes_sync_invalid_mime() -> None:
|
||||
"""batch_extract_bytes_sync: unsupported MIME."""
|
||||
items = [BatchBytesItem(content=bytes([100, 97, 116, 97]), mime_type="application/x-unknown")]
|
||||
|
||||
_ = batch_extract_bytes_sync(items, None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_file_async_basic() -> None:
|
||||
"""Extract text from multiple files asynchronously."""
|
||||
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
|
||||
|
||||
_ = await batch_extract_files(paths, None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_file_async_not_found() -> None:
|
||||
"""batch_extract_file async nonexistent."""
|
||||
paths = [BatchFileItem(path="/nonexistent/a.pdf")]
|
||||
|
||||
_ = await batch_extract_files(paths, None)
|
||||
|
||||
|
||||
def test_batch_file_not_found() -> None:
|
||||
"""batch_extract_file_sync nonexistent."""
|
||||
paths = [BatchFileItem(path="/nonexistent/a.pdf"), BatchFileItem(path="/nonexistent/b.txt")]
|
||||
|
||||
_ = batch_extract_files_sync(paths, None)
|
||||
|
||||
|
||||
def test_batch_file_partial() -> None:
|
||||
"""batch_extract_file_sync mixed."""
|
||||
paths = [BatchFileItem(path="text/plain.txt"), BatchFileItem(path="/nonexistent/missing.pdf")]
|
||||
|
||||
_ = batch_extract_files_sync(paths, None)
|
||||
|
||||
|
||||
def test_batch_file_sync_basic() -> None:
|
||||
"""Extract text from multiple files synchronously."""
|
||||
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
|
||||
|
||||
_ = batch_extract_files_sync(paths, None)
|
||||
41
e2e/python/tests/test_code.py
generated
Normal file
41
e2e/python/tests/test_code.py
generated
Normal file
@@ -0,0 +1,41 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: code."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import extract_file_sync
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_code_shebang_detection() -> None:
|
||||
"""Test language detection from shebang line via bytes input."""
|
||||
path = "code/script.sh"
|
||||
mime_type = "text/x-source-code"
|
||||
|
||||
result = extract_file_sync(path, mime_type, None)
|
||||
assert result.mime_type.strip() == "text/x-source-code" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert "build" in result.content # noqa: S101
|
||||
assert "clean" in result.content # noqa: S101
|
||||
227
e2e/python/tests/test_contract.py
generated
Normal file
227
e2e/python/tests/test_contract.py
generated
Normal file
@@ -0,0 +1,227 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: contract."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import extract_file, extract_file_sync, extract_bytes_sync, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_api_batch_bytes_async() -> None:
|
||||
"""Tests async batch bytes extraction API (batch_extract_bytes)."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
|
||||
result = await extract_file(path, None, None)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert any(v in result.content for v in ["May 5, 2023", "Mallori"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_api_batch_bytes_with_configs_async() -> None:
|
||||
"""Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(output_format="markdown")
|
||||
|
||||
result = await extract_file(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_api_batch_file_async() -> None:
|
||||
"""Tests async batch file extraction API (batch_extract_file)."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
|
||||
result = await extract_file(path, None, None)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert any(v in result.content for v in ["May 5, 2023", "Mallori"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_api_batch_file_with_configs_async() -> None:
|
||||
"""Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(output_format="markdown")
|
||||
|
||||
result = await extract_file(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_api_extract_bytes_async() -> None:
|
||||
"""Tests async bytes extraction API (extract_bytes)."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
|
||||
result = await extract_file(path, None, None)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert any(v in result.content for v in ["May 5, 2023", "Mallori"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_api_extract_file_async() -> None:
|
||||
"""Tests async file extraction API (extract_file)."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
|
||||
result = await extract_file(path, None, None)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert any(v in result.content for v in ["May 5, 2023", "Mallori"]) # noqa: S101
|
||||
|
||||
|
||||
def test_config_chunking_prepend_heading_context() -> None:
|
||||
"""Tests markdown chunker prepends heading hierarchy to chunk content."""
|
||||
path = "markdown/extraction_test.md"
|
||||
config = ExtractionConfig(chunking={"chunker_type": "markdown", "max_chars": 300, "max_overlap": 50, "prepend_heading_context": True})
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'chunks' not available on result type
|
||||
assert all(c.content for c in (result.chunks or [])) # noqa: S101
|
||||
assert all(c.metadata and c.metadata.heading_context is not None for c in (result.chunks or [])) # noqa: S101
|
||||
assert bool((result.chunks or []) and (result.chunks[0].metadata and result.chunks[0].metadata.heading_context)) # noqa: S101
|
||||
|
||||
|
||||
def test_config_document_structure_with_headings() -> None:
|
||||
"""Tests document structure with DOCX heading-driven nesting."""
|
||||
path = "docx/fake.docx"
|
||||
config = ExtractionConfig(include_document_structure=True)
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" # noqa: S101
|
||||
# skipped: field 'document' not available on result type
|
||||
# skipped: field 'document.nodes' not available on result type
|
||||
|
||||
|
||||
def test_config_element_types() -> None:
|
||||
"""Tests element-based result format with element type assertions on DOCX."""
|
||||
path = "docx/unit_test_headers.docx"
|
||||
config = ExtractionConfig(result_format="element_based")
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert any(v in result.mime_type for v in ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]) # noqa: S101
|
||||
# skipped: field 'elements' not available on result type
|
||||
|
||||
|
||||
def test_config_extraction_timeout() -> None:
|
||||
"""Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(extraction_timeout_secs=300)
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
|
||||
|
||||
def test_config_keywords() -> None:
|
||||
"""Tests keyword extraction via YAKE algorithm."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(keywords={"algorithm": "yake", "max_keywords": 10})
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'keywords' not available on Python ExtractionResult
|
||||
# skipped: field 'keywords' not available on Python ExtractionResult
|
||||
|
||||
|
||||
def test_config_pages() -> None:
|
||||
"""Tests page extraction and page marker configuration."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(pages={"extract_pages": True, "insert_page_markers": True})
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert any(v in result.content for v in ["PAGE"]) # noqa: S101
|
||||
|
||||
|
||||
def test_config_quality_enabled() -> None:
|
||||
"""Tests quality scoring produces a score value in [0.0, 1.0]."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(enable_quality_processing=True)
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'quality_score' not available on result type
|
||||
# skipped: field 'quality_score' not available on result type
|
||||
# skipped: field 'quality_score' not available on result type
|
||||
|
||||
|
||||
def test_config_security_limits() -> None:
|
||||
"""Tests archive extraction with custom security limits."""
|
||||
path = "archives/documents.zip"
|
||||
config = ExtractionConfig(security_limits={"max_archive_size": 104857600, "max_compression_ratio": 50, "max_files_in_archive": 100})
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert any(v in result.mime_type for v in ["application/zip", "application/x-zip-compressed"]) # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
|
||||
|
||||
def test_config_tree_sitter() -> None:
|
||||
"""Tests tree-sitter configuration round-trip."""
|
||||
path = "code/hello.py"
|
||||
config = ExtractionConfig(tree_sitter={"groups": ["web"], "languages": ["python", "rust"], "process": {"comments": False, "diagnostics": False, "docstrings": False, "exports": True, "imports": True, "structure": True, "symbols": False}})
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "text/x-source-code" # noqa: S101
|
||||
assert len(result.content) >= 5 # noqa: S101
|
||||
|
||||
|
||||
def test_output_format_bytes_markdown() -> None:
|
||||
"""Tests markdown output format via bytes extraction API."""
|
||||
content = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
mime_type = "application/pdf"
|
||||
config = ExtractionConfig(output_format="markdown")
|
||||
|
||||
result = extract_bytes_sync(content, mime_type, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
|
||||
def test_output_format_markdown() -> None:
|
||||
"""Tests Markdown output format."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
config = ExtractionConfig(output_format="markdown")
|
||||
|
||||
result = extract_file_sync(path, None, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
58
e2e/python/tests/test_detection.py
generated
Normal file
58
e2e/python/tests/test_detection.py
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: detection."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import detect_mime_type_from_bytes, get_extensions_for_mime
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_detect_mime_bytes_html() -> None:
|
||||
"""Detect HTML MIME from bytes."""
|
||||
content = Path("html/html.html").read_bytes()
|
||||
|
||||
_ = detect_mime_type_from_bytes(content)
|
||||
|
||||
|
||||
def test_detect_mime_bytes_pdf() -> None:
|
||||
"""Detect PDF MIME type from bytes."""
|
||||
content = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
|
||||
_ = detect_mime_type_from_bytes(content)
|
||||
|
||||
|
||||
def test_detect_mime_bytes_png() -> None:
|
||||
"""Detect PNG MIME type from bytes."""
|
||||
content = Path("images/test_hello_world.png").read_bytes()
|
||||
|
||||
_ = detect_mime_type_from_bytes(content)
|
||||
|
||||
|
||||
def test_get_extensions_unknown_mime() -> None:
|
||||
"""get_extensions unknown MIME."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
mime_type = "application/x-totally-unknown"
|
||||
get_extensions_for_mime(mime_type)
|
||||
39
e2e/python/tests/test_document_extractor_management.py
generated
Normal file
39
e2e/python/tests/test_document_extractor_management.py
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: document_extractor_management."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import clear_document_extractors, list_document_extractors
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_document_extractors_clear() -> None:
|
||||
"""Clear all document extractors and verify list is empty."""
|
||||
_ = clear_document_extractors()
|
||||
|
||||
|
||||
def test_extractors_list() -> None:
|
||||
"""List all registered document extractors."""
|
||||
_ = list_document_extractors()
|
||||
59
e2e/python/tests/test_embed_async_pending.py
generated
Normal file
59
e2e/python/tests/test_embed_async_pending.py
generated
Normal file
@@ -0,0 +1,59 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: embed_async_pending."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import embed_texts_async, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_embed_texts_async_empty_input() -> None:
|
||||
"""embed_texts_async: empty text list."""
|
||||
texts = []
|
||||
|
||||
result = await embed_texts_async(texts, None)
|
||||
assert len(result) == 0 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_embed_texts_async_happy() -> None:
|
||||
"""embed_texts_async: basic async embedding."""
|
||||
texts = ["First", "Second"]
|
||||
|
||||
result = await embed_texts_async(texts, None)
|
||||
assert len(result) >= 2 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_embed_texts_async_preset_switch() -> None:
|
||||
"""embed_texts_async: preset override."""
|
||||
texts = ["Text"]
|
||||
config = ExtractionConfig(model={"name": "balanced", "type": "preset"})
|
||||
|
||||
_ = await embed_texts_async(texts, config)
|
||||
37
e2e/python/tests/test_embed_extra.py
generated
Normal file
37
e2e/python/tests/test_embed_extra.py
generated
Normal file
@@ -0,0 +1,37 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: embed_extra."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import embed_texts, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_embed_texts_batch() -> None:
|
||||
"""Batch embed texts."""
|
||||
texts = ["Hello", "World"]
|
||||
config = ExtractionConfig(model={"name": "balanced", "type": "preset"})
|
||||
|
||||
_ = embed_texts(texts, config)
|
||||
39
e2e/python/tests/test_embedding_backend_management.py
generated
Normal file
39
e2e/python/tests/test_embedding_backend_management.py
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: embedding_backend_management."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import clear_embedding_backends, list_embedding_backends
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_embedding_backends_clear() -> None:
|
||||
"""Clear all embedding backends and verify list is empty."""
|
||||
_ = clear_embedding_backends()
|
||||
|
||||
|
||||
def test_embedding_backends_list() -> None:
|
||||
"""List all registered embedding backends."""
|
||||
_ = list_embedding_backends()
|
||||
66
e2e/python/tests/test_embeddings.py
generated
Normal file
66
e2e/python/tests/test_embeddings.py
generated
Normal file
@@ -0,0 +1,66 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: embeddings."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import embed_texts, get_embedding_preset, list_embedding_presets, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_embed_texts_different_preset() -> None:
|
||||
"""embed_texts: multilingual preset."""
|
||||
texts = ["Hello world", "Test"]
|
||||
config = ExtractionConfig(model={"name": "multilingual", "type": "preset"})
|
||||
|
||||
result = embed_texts(texts, config)
|
||||
assert len(result) >= 2 # noqa: S101
|
||||
|
||||
|
||||
def test_get_embedding_preset_known() -> None:
|
||||
"""get_embedding_preset: known preset."""
|
||||
name = "balanced"
|
||||
|
||||
_ = get_embedding_preset(name)
|
||||
|
||||
|
||||
def test_get_embedding_preset_nominal() -> None:
|
||||
"""get_embedding_preset: nominal case."""
|
||||
name = "balanced"
|
||||
|
||||
_ = get_embedding_preset(name)
|
||||
|
||||
|
||||
def test_get_embedding_preset_unknown() -> None:
|
||||
"""get_embedding_preset: unknown preset fails."""
|
||||
name = "nonexistent-xyz"
|
||||
|
||||
result = get_embedding_preset(name)
|
||||
assert not result # noqa: S101
|
||||
|
||||
|
||||
def test_list_embedding_presets_sanity() -> None:
|
||||
"""list_embedding_presets: returns at least one."""
|
||||
result = list_embedding_presets()
|
||||
assert result # noqa: S101
|
||||
75
e2e/python/tests/test_error.py
generated
Normal file
75
e2e/python/tests/test_error.py
generated
Normal file
@@ -0,0 +1,75 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: error."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import extract_bytes_sync, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_error_empty_bytes() -> None:
|
||||
"""Graceful handling of empty bytes (should not error)."""
|
||||
content = Path("text/empty.txt").read_bytes()
|
||||
mime_type = "text/plain"
|
||||
config = ExtractionConfig()
|
||||
|
||||
_ = extract_bytes_sync(content, mime_type, config)
|
||||
|
||||
|
||||
def test_error_empty_mime() -> None:
|
||||
"""Error when extracting with empty MIME type."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
content = Path("text/plain.txt").read_bytes()
|
||||
mime_type = ""
|
||||
config = ExtractionConfig()
|
||||
extract_bytes_sync(content, mime_type, config)
|
||||
|
||||
|
||||
def test_error_extract_bytes_conflicting_ocr() -> None:
|
||||
"""extract_bytes force+disable OCR."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
content = Path("text/fake_text.txt").read_bytes()
|
||||
mime_type = "text/plain"
|
||||
config = ExtractionConfig(disable_ocr=True, force_ocr=True)
|
||||
extract_bytes_sync(content, mime_type, config)
|
||||
|
||||
|
||||
def test_error_invalid_mime_format() -> None:
|
||||
"""Error when extracting with invalid MIME type format."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
content = Path("text/plain.txt").read_bytes()
|
||||
mime_type = "not-a-mime"
|
||||
config = ExtractionConfig()
|
||||
extract_bytes_sync(content, mime_type, config)
|
||||
|
||||
|
||||
def test_error_unsupported_mime() -> None:
|
||||
"""Error when extracting with unsupported MIME type."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
content = Path("text/plain.txt").read_bytes()
|
||||
mime_type = "application/x-nonexistent"
|
||||
config = ExtractionConfig()
|
||||
extract_bytes_sync(content, mime_type, config)
|
||||
75
e2e/python/tests/test_format_specific.py
generated
Normal file
75
e2e/python/tests/test_format_specific.py
generated
Normal file
@@ -0,0 +1,75 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: format_specific."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import extract_bytes_sync, extract_file_sync
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_format_docx_standalone() -> None:
|
||||
"""Standalone DOCX extraction using extract_bytes_sync."""
|
||||
content = Path("docx/fake.docx").read_bytes()
|
||||
mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
result = extract_bytes_sync(content, mime_type, None)
|
||||
assert len(result.content) >= 20 # noqa: S101
|
||||
|
||||
|
||||
def test_format_hwpx_standalone() -> None:
|
||||
"""Standalone HWPX extraction using extract_bytes_sync."""
|
||||
content = Path("hwpx/simple.hwpx").read_bytes()
|
||||
mime_type = "application/haansofthwpx"
|
||||
|
||||
result = extract_bytes_sync(content, mime_type, None)
|
||||
assert len(result.content) >= 20 # noqa: S101
|
||||
assert "Hello from HWPX" in result.content # noqa: S101
|
||||
|
||||
|
||||
def test_format_pdf_text() -> None:
|
||||
"""Standalone PDF text extraction using extract_bytes_sync."""
|
||||
content = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
mime_type = "application/pdf"
|
||||
|
||||
result = extract_bytes_sync(content, mime_type, None)
|
||||
assert len(result.content) >= 50 # noqa: S101
|
||||
assert any(v in result.content for v in ["Mallori", "May"]) # noqa: S101
|
||||
|
||||
|
||||
def test_format_pptx() -> None:
|
||||
"""PPTX presentation extraction using extract_file_sync."""
|
||||
path = "pptx/simple.pptx"
|
||||
mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
|
||||
_ = extract_file_sync(path, mime_type, None)
|
||||
|
||||
|
||||
def test_format_xlsx() -> None:
|
||||
"""XLSX spreadsheet extraction using extract_file_sync."""
|
||||
path = "xlsx/stanley_cups.xlsx"
|
||||
mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
|
||||
_ = extract_file_sync(path, mime_type, None)
|
||||
54
e2e/python/tests/test_mime_utilities.py
generated
Normal file
54
e2e/python/tests/test_mime_utilities.py
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: mime_utilities."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import detect_mime_type_from_bytes, get_extensions_for_mime
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_mime_detect_bytes() -> None:
|
||||
"""Detect MIME type from file bytes."""
|
||||
content = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
|
||||
result = detect_mime_type_from_bytes(content)
|
||||
assert "pdf" in result # noqa: S101
|
||||
|
||||
|
||||
def test_mime_detect_image() -> None:
|
||||
"""Detect MIME type from PNG image bytes."""
|
||||
content = Path("images/test_hello_world.png").read_bytes()
|
||||
|
||||
result = detect_mime_type_from_bytes(content)
|
||||
assert "png" in result # noqa: S101
|
||||
|
||||
|
||||
def test_mime_get_extensions() -> None:
|
||||
"""Get file extensions for a MIME type."""
|
||||
mime_type = "application/pdf"
|
||||
|
||||
result = get_extensions_for_mime(mime_type)
|
||||
assert "pdf" in result # noqa: S101
|
||||
46
e2e/python/tests/test_ocr_backend_management.py
generated
Normal file
46
e2e/python/tests/test_ocr_backend_management.py
generated
Normal file
@@ -0,0 +1,46 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: ocr_backend_management."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import clear_ocr_backends, list_ocr_backends, unregister_ocr_backend
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_ocr_backends_clear() -> None:
|
||||
"""Clear all OCR backends and verify list is empty."""
|
||||
_ = clear_ocr_backends()
|
||||
|
||||
|
||||
def test_ocr_backends_list() -> None:
|
||||
"""List all registered OCR backends."""
|
||||
_ = list_ocr_backends()
|
||||
|
||||
|
||||
def test_ocr_backends_unregister() -> None:
|
||||
"""Unregister nonexistent OCR backend gracefully."""
|
||||
name = "nonexistent-backend-xyz"
|
||||
|
||||
_ = unregister_ocr_backend(name)
|
||||
47
e2e/python/tests/test_pdf.py
generated
Normal file
47
e2e/python/tests/test_pdf.py
generated
Normal file
@@ -0,0 +1,47 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: pdf."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import render_pdf_page_to_png
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_render_pdf_page_first() -> None:
|
||||
"""render_pdf_page_to_png: first page."""
|
||||
pdf_bytes = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
page_index = 0
|
||||
|
||||
result = render_pdf_page_to_png(pdf_bytes, page_index)
|
||||
assert len(result) >= 100 # noqa: S101
|
||||
|
||||
|
||||
def test_render_pdf_page_out_of_range() -> None:
|
||||
"""render_pdf_page_to_png: page out of range."""
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
pdf_bytes = Path("pdf/fake_memo.pdf").read_bytes()
|
||||
page_index = 999
|
||||
render_pdf_page_to_png(pdf_bytes, page_index)
|
||||
176
e2e/python/tests/test_plugin_api.py
generated
Normal file
176
e2e/python/tests/test_plugin_api.py
generated
Normal file
@@ -0,0 +1,176 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: plugin_api."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import register_document_extractor, register_embedding_backend, register_ocr_backend, register_post_processor, register_renderer, register_validator, unregister_document_extractor, unregister_embedding_backend, unregister_post_processor, unregister_renderer, unregister_validator, unregister_ocr_backend
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_register_document_extractor_trait_bridge() -> None:
|
||||
"""register_document_extractor: trait bridge."""
|
||||
class _TestStub_register_document_extractor_trait_bridge:
|
||||
def name(self):
|
||||
return "test-extractor"
|
||||
def initialize(self):
|
||||
pass
|
||||
def shutdown(self):
|
||||
pass
|
||||
async def extract_bytes(self, _p0, _p1, _p2):
|
||||
return {}
|
||||
def supported_mime_types(self):
|
||||
return []
|
||||
|
||||
|
||||
_ = register_document_extractor(_TestStub_register_document_extractor_trait_bridge())
|
||||
unregister_document_extractor("test-extractor")
|
||||
|
||||
|
||||
def test_register_embedding_backend_trait_bridge() -> None:
|
||||
"""register_embedding_backend: trait bridge."""
|
||||
class _TestStub_register_embedding_backend_trait_bridge:
|
||||
def name(self):
|
||||
return "test-embedding-backend"
|
||||
def initialize(self):
|
||||
pass
|
||||
def shutdown(self):
|
||||
pass
|
||||
def dimensions(self):
|
||||
return 1
|
||||
async def embed(self, _p0):
|
||||
return []
|
||||
|
||||
|
||||
_ = register_embedding_backend(_TestStub_register_embedding_backend_trait_bridge())
|
||||
unregister_embedding_backend("test-embedding-backend")
|
||||
|
||||
|
||||
def test_register_ocr_backend_trait_bridge() -> None:
|
||||
"""register_ocr_backend: trait bridge."""
|
||||
class _TestStub_register_ocr_backend_trait_bridge:
|
||||
def name(self):
|
||||
return "test-backend"
|
||||
def initialize(self):
|
||||
pass
|
||||
def shutdown(self):
|
||||
pass
|
||||
async def process_image(self, _p0, _p1):
|
||||
return {}
|
||||
def supports_language(self, _p0):
|
||||
return False
|
||||
def backend_type(self):
|
||||
return {}
|
||||
|
||||
|
||||
_ = register_ocr_backend(_TestStub_register_ocr_backend_trait_bridge())
|
||||
unregister_ocr_backend("test-backend")
|
||||
|
||||
|
||||
def test_register_post_processor_trait_bridge() -> None:
|
||||
"""register_post_processor: trait bridge."""
|
||||
class _TestStub_register_post_processor_trait_bridge:
|
||||
def name(self):
|
||||
return "test-processor"
|
||||
def initialize(self):
|
||||
pass
|
||||
def shutdown(self):
|
||||
pass
|
||||
async def process(self, _p0, _p1):
|
||||
return None
|
||||
def processing_stage(self):
|
||||
return {}
|
||||
|
||||
|
||||
_ = register_post_processor(_TestStub_register_post_processor_trait_bridge())
|
||||
unregister_post_processor("test-processor")
|
||||
|
||||
|
||||
def test_register_renderer_trait_bridge() -> None:
|
||||
"""register_renderer: trait bridge."""
|
||||
class _TestStub_register_renderer_trait_bridge:
|
||||
def name(self):
|
||||
return "test-renderer"
|
||||
def initialize(self):
|
||||
pass
|
||||
def shutdown(self):
|
||||
pass
|
||||
def render(self, _p0):
|
||||
return ""
|
||||
|
||||
|
||||
_ = register_renderer(_TestStub_register_renderer_trait_bridge())
|
||||
unregister_renderer("test-renderer")
|
||||
|
||||
|
||||
def test_register_validator_trait_bridge() -> None:
|
||||
"""register_validator: trait bridge."""
|
||||
class _TestStub_register_validator_trait_bridge:
|
||||
def name(self):
|
||||
return "test-validator"
|
||||
def initialize(self):
|
||||
pass
|
||||
def shutdown(self):
|
||||
pass
|
||||
async def validate(self, _p0, _p1):
|
||||
return None
|
||||
|
||||
|
||||
_ = register_validator(_TestStub_register_validator_trait_bridge())
|
||||
unregister_validator("test-validator")
|
||||
|
||||
|
||||
def test_unregister_document_extractor_after_register() -> None:
|
||||
"""unregister_document_extractor."""
|
||||
name = "test-extractor"
|
||||
|
||||
_ = unregister_document_extractor(name)
|
||||
|
||||
|
||||
def test_unregister_embedding_backend_after_register() -> None:
|
||||
"""unregister_embedding_backend."""
|
||||
name = "test-embedding-backend"
|
||||
|
||||
_ = unregister_embedding_backend(name)
|
||||
|
||||
|
||||
def test_unregister_post_processor_after_register() -> None:
|
||||
"""unregister_post_processor."""
|
||||
name = "test-processor"
|
||||
|
||||
_ = unregister_post_processor(name)
|
||||
|
||||
|
||||
def test_unregister_renderer_after_register() -> None:
|
||||
"""unregister_renderer."""
|
||||
name = "test-renderer"
|
||||
|
||||
_ = unregister_renderer(name)
|
||||
|
||||
|
||||
def test_unregister_validator_after_register() -> None:
|
||||
"""unregister_validator."""
|
||||
name = "test-validator"
|
||||
|
||||
_ = unregister_validator(name)
|
||||
39
e2e/python/tests/test_post_processor_management.py
generated
Normal file
39
e2e/python/tests/test_post_processor_management.py
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: post_processor_management."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import clear_post_processors, list_post_processors
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_post_processors_clear() -> None:
|
||||
"""Clear all post-processors and verify list is empty."""
|
||||
_ = clear_post_processors()
|
||||
|
||||
|
||||
def test_post_processors_list() -> None:
|
||||
"""List all registered post-processors."""
|
||||
_ = list_post_processors()
|
||||
59
e2e/python/tests/test_registry.py
generated
Normal file
59
e2e/python/tests/test_registry.py
generated
Normal file
@@ -0,0 +1,59 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: registry."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import list_document_extractors, list_embedding_backends, list_ocr_backends, list_post_processors, list_renderers, list_validators
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_list_document_extractors() -> None:
|
||||
"""List document extractors."""
|
||||
_ = list_document_extractors()
|
||||
|
||||
|
||||
def test_list_embedding_backends() -> None:
|
||||
"""List embedding backends."""
|
||||
_ = list_embedding_backends()
|
||||
|
||||
|
||||
def test_list_ocr_backends() -> None:
|
||||
"""List OCR backends."""
|
||||
_ = list_ocr_backends()
|
||||
|
||||
|
||||
def test_list_post_processors() -> None:
|
||||
"""List post-processors."""
|
||||
_ = list_post_processors()
|
||||
|
||||
|
||||
def test_list_renderers() -> None:
|
||||
"""List renderers."""
|
||||
_ = list_renderers()
|
||||
|
||||
|
||||
def test_list_validators() -> None:
|
||||
"""List validators."""
|
||||
_ = list_validators()
|
||||
50
e2e/python/tests/test_registry_operations.py
generated
Normal file
50
e2e/python/tests/test_registry_operations.py
generated
Normal file
@@ -0,0 +1,50 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: registry_operations."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import get_extensions_for_mime
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_extensions_docx() -> None:
|
||||
"""Get file extensions for DOCX MIME type."""
|
||||
mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
_ = get_extensions_for_mime(mime_type)
|
||||
|
||||
|
||||
def test_extensions_html() -> None:
|
||||
"""Get file extensions for HTML MIME type."""
|
||||
mime_type = "text/html"
|
||||
|
||||
_ = get_extensions_for_mime(mime_type)
|
||||
|
||||
|
||||
def test_extensions_pdf() -> None:
|
||||
"""Get file extensions for PDF MIME type."""
|
||||
mime_type = "application/pdf"
|
||||
|
||||
_ = get_extensions_for_mime(mime_type)
|
||||
39
e2e/python/tests/test_renderer_management.py
generated
Normal file
39
e2e/python/tests/test_renderer_management.py
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: renderer_management."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import clear_renderers, list_renderers
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_renderers_clear() -> None:
|
||||
"""Clear all renderers and verify list is empty."""
|
||||
_ = clear_renderers()
|
||||
|
||||
|
||||
def test_renderers_list() -> None:
|
||||
"""List all registered renderers."""
|
||||
_ = list_renderers()
|
||||
148
e2e/python/tests/test_smoke.py
generated
Normal file
148
e2e/python/tests/test_smoke.py
generated
Normal file
@@ -0,0 +1,148 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: smoke."""
|
||||
|
||||
from pathlib import Path
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import extract_bytes, extract_file, ExtractionConfig
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_ocr_image_png() -> None:
|
||||
"""OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge."""
|
||||
content = Path("images/test_hello_world.png").read_bytes()
|
||||
mime_type = "image/png"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_bytes(content, mime_type, config)
|
||||
assert result.mime_type.strip() == "image/png" # noqa: S101
|
||||
assert len(result.content) >= 1 # noqa: S101
|
||||
assert any(v in result.content for v in ["Hello", "World", "hello", "world"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_docx_basic() -> None:
|
||||
"""Smoke test: DOCX with formatted text."""
|
||||
path = "docx/fake.docx"
|
||||
mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_file(path, mime_type, config)
|
||||
assert result.mime_type.strip() == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" # noqa: S101
|
||||
assert len(result.content) >= 20 # noqa: S101
|
||||
assert any(v in result.content for v in ["Lorem", "ipsum", "document", "text"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_html_basic() -> None:
|
||||
"""Smoke test: HTML table extraction."""
|
||||
path = "html/simple_table.html"
|
||||
mime_type = "text/html"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_file(path, mime_type, config)
|
||||
assert result.mime_type.strip() == "text/html" # noqa: S101
|
||||
assert len(result.content) >= 10 # noqa: S101
|
||||
assert any(v in result.content for v in ["Sample Data Table", "Laptop", "Electronics", "Product"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_image_png() -> None:
|
||||
"""Smoke test: PNG image (without OCR, metadata only)."""
|
||||
path = "images/sample.png"
|
||||
config = ExtractionConfig(disable_ocr=True)
|
||||
|
||||
result = await extract_file(path, None, config)
|
||||
assert result.mime_type.strip() == "image/png" # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_json_basic() -> None:
|
||||
"""Smoke test: JSON file extraction."""
|
||||
path = "json/simple.json"
|
||||
mime_type = "application/json"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_file(path, mime_type, config)
|
||||
assert result.mime_type.strip() == "application/json" # noqa: S101
|
||||
assert len(result.content) >= 5 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_pdf_basic() -> None:
|
||||
"""Smoke test: PDF with simple text extraction."""
|
||||
path = "pdf/fake_memo.pdf"
|
||||
mime_type = "application/pdf"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_file(path, mime_type, config)
|
||||
assert result.mime_type.strip() == "application/pdf" # noqa: S101
|
||||
assert len(result.content) >= 50 # noqa: S101
|
||||
assert any(v in result.content for v in ["May 5, 2023", "To Whom it May Concern"]) # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_txt_basic() -> None:
|
||||
"""Smoke test: Plain text file."""
|
||||
path = "text/report.txt"
|
||||
mime_type = "text/plain"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_file(path, mime_type, config)
|
||||
assert result.mime_type.strip() == "text/plain" # noqa: S101
|
||||
assert len(result.content) >= 5 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_smoke_xlsx_basic() -> None:
|
||||
"""Smoke test: XLSX with basic spreadsheet data including tables."""
|
||||
path = "xlsx/stanley_cups.xlsx"
|
||||
mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
config = ExtractionConfig()
|
||||
|
||||
result = await extract_file(path, mime_type, config)
|
||||
assert result.mime_type.strip() == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" # noqa: S101
|
||||
assert len(result.content) >= 100 # noqa: S101
|
||||
assert "Team" in result.content # noqa: S101
|
||||
assert "Location" in result.content # noqa: S101
|
||||
assert "Stanley Cups" in result.content # noqa: S101
|
||||
assert "Blues" in result.content # noqa: S101
|
||||
assert "Flyers" in result.content # noqa: S101
|
||||
assert "Maple Leafs" in result.content # noqa: S101
|
||||
assert "STL" in result.content # noqa: S101
|
||||
assert "PHI" in result.content # noqa: S101
|
||||
assert "TOR" in result.content # noqa: S101
|
||||
# skipped: field 'tables' not available on result type
|
||||
# skipped: field 'metadata.format.excel.sheet_count' not available on result type
|
||||
# skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
39
e2e/python/tests/test_validator_management.py
generated
Normal file
39
e2e/python/tests/test_validator_management.py
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: validator_management."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import clear_validators, list_validators
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_validators_clear() -> None:
|
||||
"""Clear all validators and verify list is empty."""
|
||||
_ = clear_validators()
|
||||
|
||||
|
||||
def test_validators_list() -> None:
|
||||
"""List all registered validators."""
|
||||
_ = list_validators()
|
||||
208
e2e/python/uv.lock
generated
Normal file
208
e2e/python/uv.lock
generated
Normal file
@@ -0,0 +1,208 @@
|
||||
version = 1
|
||||
revision = 3
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[[package]]
|
||||
name = "backports-asyncio-runner"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kreuzberg"
|
||||
version = "5.0.0rc3"
|
||||
source = { directory = "../../packages/python" }
|
||||
|
||||
[package.metadata]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
{ name = "mypy", specifier = ">=1.19" },
|
||||
{ name = "ruff", specifier = ">=0.14.8" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kreuzberg-e2e"
|
||||
version = "0.0.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "kreuzberg" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
{ name = "pytest-timeout" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "kreuzberg", directory = "../../packages/python" },
|
||||
{ name = "pytest", specifier = ">=7.4" },
|
||||
{ name = "pytest-asyncio", specifier = ">=0.23" },
|
||||
{ name = "pytest-timeout", specifier = ">=2.1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "26.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.6.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.20.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "9.0.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "exceptiongroup", marker = "python_full_version < '3.11'" },
|
||||
{ name = "iniconfig" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pluggy" },
|
||||
{ name = "pygments" },
|
||||
{ name = "tomli", marker = "python_full_version < '3.11'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-asyncio"
|
||||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" },
|
||||
{ name = "pytest" },
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-timeout"
|
||||
version = "2.4.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.4.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543, upload-time = "2026-03-25T20:22:03.828Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f4/11/db3d5885d8528263d8adc260bb2d28ebf1270b96e98f0e0268d32b8d9900/tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30", size = 154704, upload-time = "2026-03-25T20:21:10.473Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/f7/675db52c7e46064a9aa928885a9b20f4124ecb9bc2e1ce74c9106648d202/tomli-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a", size = 149454, upload-time = "2026-03-25T20:21:12.036Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/61/71/81c50943cf953efa35bce7646caab3cf457a7d8c030b27cfb40d7235f9ee/tomli-2.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076", size = 237561, upload-time = "2026-03-25T20:21:13.098Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/48/c1/f41d9cb618acccca7df82aaf682f9b49013c9397212cb9f53219e3abac37/tomli-2.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9", size = 243824, upload-time = "2026-03-25T20:21:14.569Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/e4/5a816ecdd1f8ca51fb756ef684b90f2780afc52fc67f987e3c61d800a46d/tomli-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c", size = 242227, upload-time = "2026-03-25T20:21:15.712Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/49/2b2a0ef529aa6eec245d25f0c703e020a73955ad7edf73e7f54ddc608aa5/tomli-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc", size = 247859, upload-time = "2026-03-25T20:21:17.001Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/bd/6c1a630eaca337e1e78c5903104f831bda934c426f9231429396ce3c3467/tomli-2.4.1-cp311-cp311-win32.whl", hash = "sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049", size = 97204, upload-time = "2026-03-25T20:21:18.079Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/59/71461df1a885647e10b6bb7802d0b8e66480c61f3f43079e0dcd315b3954/tomli-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e", size = 108084, upload-time = "2026-03-25T20:21:18.978Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/83/dceca96142499c069475b790e7913b1044c1a4337e700751f48ed723f883/tomli-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece", size = 95285, upload-time = "2026-03-25T20:21:20.309Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/ba/42f134a3fe2b370f555f44b1d72feebb94debcab01676bf918d0cb70e9aa/tomli-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a", size = 155924, upload-time = "2026-03-25T20:21:21.626Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/c7/62d7a17c26487ade21c5422b646110f2162f1fcc95980ef7f63e73c68f14/tomli-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085", size = 150018, upload-time = "2026-03-25T20:21:23.002Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/05/79d13d7c15f13bdef410bdd49a6485b1c37d28968314eabee452c22a7fda/tomli-2.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9", size = 244948, upload-time = "2026-03-25T20:21:24.04Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/90/d62ce007a1c80d0b2c93e02cab211224756240884751b94ca72df8a875ca/tomli-2.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5", size = 253341, upload-time = "2026-03-25T20:21:25.177Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/7e/caf6496d60152ad4ed09282c1885cca4eea150bfd007da84aea07bcc0a3e/tomli-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585", size = 248159, upload-time = "2026-03-25T20:21:26.364Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/e7/c6f69c3120de34bbd882c6fba7975f3d7a746e9218e56ab46a1bc4b42552/tomli-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1", size = 253290, upload-time = "2026-03-25T20:21:27.46Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/2f/4a3c322f22c5c66c4b836ec58211641a4067364f5dcdd7b974b4c5da300c/tomli-2.4.1-cp312-cp312-win32.whl", hash = "sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917", size = 98141, upload-time = "2026-03-25T20:21:28.492Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/22/4daacd05391b92c55759d55eaee21e1dfaea86ce5c571f10083360adf534/tomli-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9", size = 108847, upload-time = "2026-03-25T20:21:29.386Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/fd/70e768887666ddd9e9f5d85129e84910f2db2796f9096aa02b721a53098d/tomli-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257", size = 95088, upload-time = "2026-03-25T20:21:30.677Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/07/06/b823a7e818c756d9a7123ba2cda7d07bc2dd32835648d1a7b7b7a05d848d/tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54", size = 155866, upload-time = "2026-03-25T20:21:31.65Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/6f/12645cf7f08e1a20c7eb8c297c6f11d31c1b50f316a7e7e1e1de6e2e7b7e/tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a", size = 149887, upload-time = "2026-03-25T20:21:33.028Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/e0/90637574e5e7212c09099c67ad349b04ec4d6020324539297b634a0192b0/tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897", size = 243704, upload-time = "2026-03-25T20:21:34.51Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/8f/d3ddb16c5a4befdf31a23307f72828686ab2096f068eaf56631e136c1fdd/tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f", size = 251628, upload-time = "2026-03-25T20:21:36.012Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/f1/dbeeb9116715abee2485bf0a12d07a8f31af94d71608c171c45f64c0469d/tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d", size = 247180, upload-time = "2026-03-25T20:21:37.136Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/74/16336ffd19ed4da28a70959f92f506233bd7cfc2332b20bdb01591e8b1d1/tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5", size = 251674, upload-time = "2026-03-25T20:21:38.298Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/16/f9/229fa3434c590ddf6c0aa9af64d3af4b752540686cace29e6281e3458469/tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd", size = 97976, upload-time = "2026-03-25T20:21:39.316Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6a/1e/71dfd96bcc1c775420cb8befe7a9d35f2e5b1309798f009dca17b7708c1e/tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36", size = 108755, upload-time = "2026-03-25T20:21:40.248Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/7a/d34f422a021d62420b78f5c538e5b102f62bea616d1d75a13f0a88acb04a/tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd", size = 95265, upload-time = "2026-03-25T20:21:41.219Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/fb/9a5c8d27dbab540869f7c1f8eb0abb3244189ce780ba9cd73f3770662072/tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf", size = 155726, upload-time = "2026-03-25T20:21:42.23Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/05/d2f816630cc771ad836af54f5001f47a6f611d2d39535364f148b6a92d6b/tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac", size = 149859, upload-time = "2026-03-25T20:21:43.386Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/48/66341bdb858ad9bd0ceab5a86f90eddab127cf8b046418009f2125630ecb/tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662", size = 244713, upload-time = "2026-03-25T20:21:44.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/6d/c5fad00d82b3c7a3ab6189bd4b10e60466f22cfe8a08a9394185c8a8111c/tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853", size = 252084, upload-time = "2026-03-25T20:21:45.62Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/71/3a69e86f3eafe8c7a59d008d245888051005bd657760e96d5fbfb0b740c2/tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15", size = 247973, upload-time = "2026-03-25T20:21:46.937Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/50/361e986652847fec4bd5e4a0208752fbe64689c603c7ae5ea7cb16b1c0ca/tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba", size = 256223, upload-time = "2026-03-25T20:21:48.467Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/9a/b4173689a9203472e5467217e0154b00e260621caa227b6fa01feab16998/tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6", size = 98973, upload-time = "2026-03-25T20:21:49.526Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/58/640ac93bf230cd27d002462c9af0d837779f8773bc03dee06b5835208214/tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7", size = 109082, upload-time = "2026-03-25T20:21:50.506Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/2f/702d5e05b227401c1068f0d386d79a589bb12bf64c3d2c72ce0631e3bc49/tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232", size = 96490, upload-time = "2026-03-25T20:21:51.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/45/4b/b877b05c8ba62927d9865dd980e34a755de541eb65fffba52b4cc495d4d2/tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4", size = 164263, upload-time = "2026-03-25T20:21:52.543Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/79/6ab420d37a270b89f7195dec5448f79400d9e9c1826df982f3f8e97b24fd/tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c", size = 160736, upload-time = "2026-03-25T20:21:53.674Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/e0/3630057d8eb170310785723ed5adcdfb7d50cb7e6455f85ba8a3deed642b/tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d", size = 270717, upload-time = "2026-03-25T20:21:55.129Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/b4/1613716072e544d1a7891f548d8f9ec6ce2faf42ca65acae01d76ea06bb0/tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41", size = 278461, upload-time = "2026-03-25T20:21:56.228Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/05/38/30f541baf6a3f6df77b3df16b01ba319221389e2da59427e221ef417ac0c/tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c", size = 274855, upload-time = "2026-03-25T20:21:57.653Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/a3/ec9dd4fd2c38e98de34223b995a3b34813e6bdadf86c75314c928350ed14/tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f", size = 283144, upload-time = "2026-03-25T20:21:59.089Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/be/605a6261cac79fba2ec0c9827e986e00323a1945700969b8ee0b30d85453/tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8", size = 108683, upload-time = "2026-03-25T20:22:00.214Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/64/da524626d3b9cc40c168a13da8335fe1c51be12c0a63685cc6db7308daae/tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26", size = 121196, upload-time = "2026-03-25T20:22:01.169Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5a/cd/e80b62269fc78fc36c9af5a6b89c835baa8af28ff5ad28c7028d60860320/tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396", size = 100393, upload-time = "2026-03-25T20:22:02.137Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.15.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
|
||||
]
|
||||
Reference in New Issue
Block a user