Files
fil/e2e/python/tests/test_batch.py

110 lines
3.9 KiB
Python
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
"""E2e tests for category: batch."""
import pytest # noqa: F401
from kreuzberg import batch_extract_bytes_sync, batch_extract_bytes, batch_extract_files, batch_extract_files_sync, BatchBytesItem, BatchFileItem
def _alef_e2e_text(value: object) -> str:
return "" if value is None else str(value)
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
raw_items = getattr(item, "items", None)
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
return (
_alef_e2e_text(item),
_alef_e2e_text(getattr(item, "kind", None)),
_alef_e2e_text(getattr(item, "name", None)),
_alef_e2e_text(getattr(item, "source", None)),
_alef_e2e_text(getattr(item, "alias", None)),
_alef_e2e_text(getattr(item, "text", None)),
_alef_e2e_text(getattr(item, "signature", None)),
items_text,
)
def test_batch_bytes_invalid_mime() -> None:
"""batch_extract_bytes_sync invalid MIME."""
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111]), mime_type="application/x-nonexistent")]
_ = batch_extract_bytes_sync(items, None)
@pytest.mark.asyncio
async def test_batch_extract_bytes_happy() -> None:
"""batch_extract_bytes: happy path with mixed inputs."""
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]), mime_type="text/plain"), BatchBytesItem(content=bytes([60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62]), mime_type="text/html")]
result = await batch_extract_bytes(items, None)
assert len(result) >= 1 # noqa: S101
@pytest.mark.asyncio
async def test_batch_extract_bytes_mixed_format() -> None:
"""batch_extract_bytes: handles unsupported MIME gracefully."""
items = [BatchBytesItem(content=bytes([80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114]), mime_type="application/x-unknown")]
_ = await batch_extract_bytes(items, None)
def test_batch_extract_bytes_sync_empty_list() -> None:
"""batch_extract_bytes_sync: empty batch."""
items = []
result = batch_extract_bytes_sync(items, None)
assert len(result) == 0 # noqa: S101
def test_batch_extract_bytes_sync_invalid_mime() -> None:
"""batch_extract_bytes_sync: unsupported MIME."""
items = [BatchBytesItem(content=bytes([100, 97, 116, 97]), mime_type="application/x-unknown")]
_ = batch_extract_bytes_sync(items, None)
@pytest.mark.asyncio
async def test_batch_file_async_basic() -> None:
"""Extract text from multiple files asynchronously."""
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
_ = await batch_extract_files(paths, None)
@pytest.mark.asyncio
async def test_batch_file_async_not_found() -> None:
"""batch_extract_file async nonexistent."""
paths = [BatchFileItem(path="/nonexistent/a.pdf")]
_ = await batch_extract_files(paths, None)
def test_batch_file_not_found() -> None:
"""batch_extract_file_sync nonexistent."""
paths = [BatchFileItem(path="/nonexistent/a.pdf"), BatchFileItem(path="/nonexistent/b.txt")]
_ = batch_extract_files_sync(paths, None)
def test_batch_file_partial() -> None:
"""batch_extract_file_sync mixed."""
paths = [BatchFileItem(path="text/plain.txt"), BatchFileItem(path="/nonexistent/missing.pdf")]
_ = batch_extract_files_sync(paths, None)
def test_batch_file_sync_basic() -> None:
"""Extract text from multiple files synchronously."""
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
_ = batch_extract_files_sync(paths, None)