This commit is contained in:
109
e2e/python/tests/test_batch.py
generated
Normal file
109
e2e/python/tests/test_batch.py
generated
Normal file
@@ -0,0 +1,109 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
"""E2e tests for category: batch."""
|
||||
|
||||
import pytest # noqa: F401
|
||||
from kreuzberg import batch_extract_bytes_sync, batch_extract_bytes, batch_extract_files, batch_extract_files_sync, BatchBytesItem, BatchFileItem
|
||||
|
||||
|
||||
def _alef_e2e_text(value: object) -> str:
|
||||
return "" if value is None else str(value)
|
||||
|
||||
|
||||
def _alef_e2e_item_texts(item: object) -> tuple[str, ...]:
|
||||
raw_items = getattr(item, "items", None)
|
||||
items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else ""
|
||||
return (
|
||||
_alef_e2e_text(item),
|
||||
_alef_e2e_text(getattr(item, "kind", None)),
|
||||
_alef_e2e_text(getattr(item, "name", None)),
|
||||
_alef_e2e_text(getattr(item, "source", None)),
|
||||
_alef_e2e_text(getattr(item, "alias", None)),
|
||||
_alef_e2e_text(getattr(item, "text", None)),
|
||||
_alef_e2e_text(getattr(item, "signature", None)),
|
||||
items_text,
|
||||
)
|
||||
|
||||
|
||||
def test_batch_bytes_invalid_mime() -> None:
|
||||
"""batch_extract_bytes_sync invalid MIME."""
|
||||
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111]), mime_type="application/x-nonexistent")]
|
||||
|
||||
_ = batch_extract_bytes_sync(items, None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_extract_bytes_happy() -> None:
|
||||
"""batch_extract_bytes: happy path with mixed inputs."""
|
||||
items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]), mime_type="text/plain"), BatchBytesItem(content=bytes([60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62]), mime_type="text/html")]
|
||||
|
||||
result = await batch_extract_bytes(items, None)
|
||||
assert len(result) >= 1 # noqa: S101
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_extract_bytes_mixed_format() -> None:
|
||||
"""batch_extract_bytes: handles unsupported MIME gracefully."""
|
||||
items = [BatchBytesItem(content=bytes([80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114]), mime_type="application/x-unknown")]
|
||||
|
||||
_ = await batch_extract_bytes(items, None)
|
||||
|
||||
|
||||
def test_batch_extract_bytes_sync_empty_list() -> None:
|
||||
"""batch_extract_bytes_sync: empty batch."""
|
||||
items = []
|
||||
|
||||
result = batch_extract_bytes_sync(items, None)
|
||||
assert len(result) == 0 # noqa: S101
|
||||
|
||||
|
||||
def test_batch_extract_bytes_sync_invalid_mime() -> None:
|
||||
"""batch_extract_bytes_sync: unsupported MIME."""
|
||||
items = [BatchBytesItem(content=bytes([100, 97, 116, 97]), mime_type="application/x-unknown")]
|
||||
|
||||
_ = batch_extract_bytes_sync(items, None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_file_async_basic() -> None:
|
||||
"""Extract text from multiple files asynchronously."""
|
||||
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
|
||||
|
||||
_ = await batch_extract_files(paths, None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
async def test_batch_file_async_not_found() -> None:
|
||||
"""batch_extract_file async nonexistent."""
|
||||
paths = [BatchFileItem(path="/nonexistent/a.pdf")]
|
||||
|
||||
_ = await batch_extract_files(paths, None)
|
||||
|
||||
|
||||
def test_batch_file_not_found() -> None:
|
||||
"""batch_extract_file_sync nonexistent."""
|
||||
paths = [BatchFileItem(path="/nonexistent/a.pdf"), BatchFileItem(path="/nonexistent/b.txt")]
|
||||
|
||||
_ = batch_extract_files_sync(paths, None)
|
||||
|
||||
|
||||
def test_batch_file_partial() -> None:
|
||||
"""batch_extract_file_sync mixed."""
|
||||
paths = [BatchFileItem(path="text/plain.txt"), BatchFileItem(path="/nonexistent/missing.pdf")]
|
||||
|
||||
_ = batch_extract_files_sync(paths, None)
|
||||
|
||||
|
||||
def test_batch_file_sync_basic() -> None:
|
||||
"""Extract text from multiple files synchronously."""
|
||||
paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")]
|
||||
|
||||
_ = batch_extract_files_sync(paths, None)
|
||||
Reference in New Issue
Block a user