110 lines
3.9 KiB
Python
110 lines
3.9 KiB
Python
|
|
# This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
# To regenerate: alef generate
|
||
|
|
# To verify freshness: alef verify --exit-code
|
||
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
|
||
|
|
"""E2e tests for category: batch."""
|
||
|
|
|
||
|
|
import pytest # noqa: F401
|
||
|
|
from kreuzberg import batch_extract_bytes_sync, batch_extract_bytes, batch_extract_files, batch_extract_files_sync, BatchBytesItem, BatchFileItem
|
||
|
|
|
||
|
|
|
||
|
|
def _alef_e2e_text(value: object) -> str:
|
||
|
|
return "" if value is None else str(value)
|
||
|
|
|
||
|
|
|
||
|
|
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||
|
|
raw_items = getattr(item, "items", None)
|
||
|
|
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||
|
|
return (
|
||
|
|
_alef_e2e_text(item),
|
||
|
|
_alef_e2e_text(getattr(item, "kind", None)),
|
||
|
|
_alef_e2e_text(getattr(item, "name", None)),
|
||
|
|
_alef_e2e_text(getattr(item, "source", None)),
|
||
|
|
_alef_e2e_text(getattr(item, "alias", None)),
|
||
|
|
_alef_e2e_text(getattr(item, "text", None)),
|
||
|
|
_alef_e2e_text(getattr(item, "signature", None)),
|
||
|
|
items_text,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_bytes_invalid_mime() -> None:
|
||
|
|
"""batch_extract_bytes_sync invalid MIME."""
|
||
|
|
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111]), mime_type="application/x-nonexistent")]
|
||
|
|
|
||
|
|
_ = batch_extract_bytes_sync(items, None)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
|
||
|
|
async def test_batch_extract_bytes_happy() -> None:
|
||
|
|
"""batch_extract_bytes: happy path with mixed inputs."""
|
||
|
|
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]), mime_type="text/plain"), BatchBytesItem(content=bytes([60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62]), mime_type="text/html")]
|
||
|
|
|
||
|
|
result = await batch_extract_bytes(items, None)
|
||
|
|
assert len(result) >= 1 # noqa: S101
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
|
||
|
|
async def test_batch_extract_bytes_mixed_format() -> None:
|
||
|
|
"""batch_extract_bytes: handles unsupported MIME gracefully."""
|
||
|
|
items = [BatchBytesItem(content=bytes([80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114]), mime_type="application/x-unknown")]
|
||
|
|
|
||
|
|
_ = await batch_extract_bytes(items, None)
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_extract_bytes_sync_empty_list() -> None:
|
||
|
|
"""batch_extract_bytes_sync: empty batch."""
|
||
|
|
items = []
|
||
|
|
|
||
|
|
result = batch_extract_bytes_sync(items, None)
|
||
|
|
assert len(result) == 0 # noqa: S101
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_extract_bytes_sync_invalid_mime() -> None:
|
||
|
|
"""batch_extract_bytes_sync: unsupported MIME."""
|
||
|
|
items = [BatchBytesItem(content=bytes([100, 97, 116, 97]), mime_type="application/x-unknown")]
|
||
|
|
|
||
|
|
_ = batch_extract_bytes_sync(items, None)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
|
||
|
|
async def test_batch_file_async_basic() -> None:
|
||
|
|
"""Extract text from multiple files asynchronously."""
|
||
|
|
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
|
||
|
|
|
||
|
|
_ = await batch_extract_files(paths, None)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
|
||
|
|
async def test_batch_file_async_not_found() -> None:
|
||
|
|
"""batch_extract_file async nonexistent."""
|
||
|
|
paths = [BatchFileItem(path="/nonexistent/a.pdf")]
|
||
|
|
|
||
|
|
_ = await batch_extract_files(paths, None)
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_file_not_found() -> None:
|
||
|
|
"""batch_extract_file_sync nonexistent."""
|
||
|
|
paths = [BatchFileItem(path="/nonexistent/a.pdf"), BatchFileItem(path="/nonexistent/b.txt")]
|
||
|
|
|
||
|
|
_ = batch_extract_files_sync(paths, None)
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_file_partial() -> None:
|
||
|
|
"""batch_extract_file_sync mixed."""
|
||
|
|
paths = [BatchFileItem(path="text/plain.txt"), BatchFileItem(path="/nonexistent/missing.pdf")]
|
||
|
|
|
||
|
|
_ = batch_extract_files_sync(paths, None)
|
||
|
|
|
||
|
|
|
||
|
|
def test_batch_file_sync_basic() -> None:
|
||
|
|
"""Extract text from multiple files synchronously."""
|
||
|
|
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
|
||
|
|
|
||
|
|
_ = batch_extract_files_sync(paths, None)
|