# This file is auto-generated by alef — DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 # To regenerate: alef generate # To verify freshness: alef verify --exit-code # Issues & docs: https://github.com/kreuzberg-dev/alef """E2e tests for category: batch.""" import pytest # noqa: F401 from kreuzberg import batch_extract_bytes_sync, batch_extract_bytes, batch_extract_files, batch_extract_files_sync, BatchBytesItem, BatchFileItem def _alef_e2e_text(value: object) -> str: return "" if value is None else str(value) def _alef_e2e_item_texts(item: object) -> tuple[str, ...]: raw_items = getattr(item, "items", None) items_text = " ".join(str(value) for value in raw_items) if isinstance(raw_items, list) else "" return ( _alef_e2e_text(item), _alef_e2e_text(getattr(item, "kind", None)), _alef_e2e_text(getattr(item, "name", None)), _alef_e2e_text(getattr(item, "source", None)), _alef_e2e_text(getattr(item, "alias", None)), _alef_e2e_text(getattr(item, "text", None)), _alef_e2e_text(getattr(item, "signature", None)), items_text, ) def test_batch_bytes_invalid_mime() -> None: """batch_extract_bytes_sync invalid MIME.""" items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111]), mime_type="application/x-nonexistent")] _ = batch_extract_bytes_sync(items, None) @pytest.mark.asyncio async def test_batch_extract_bytes_happy() -> None: """batch_extract_bytes: happy path with mixed inputs.""" items = [BatchBytesItem(content=bytes([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]), mime_type="text/plain"), BatchBytesItem(content=bytes([60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62]), mime_type="text/html")] result = await batch_extract_bytes(items, None) assert len(result) >= 1 # noqa: S101 @pytest.mark.asyncio async def test_batch_extract_bytes_mixed_format() -> None: """batch_extract_bytes: handles unsupported MIME gracefully.""" items = [BatchBytesItem(content=bytes([80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114]), mime_type="application/x-unknown")] _ = await batch_extract_bytes(items, None) def test_batch_extract_bytes_sync_empty_list() -> None: """batch_extract_bytes_sync: empty batch.""" items = [] result = batch_extract_bytes_sync(items, None) assert len(result) == 0 # noqa: S101 def test_batch_extract_bytes_sync_invalid_mime() -> None: """batch_extract_bytes_sync: unsupported MIME.""" items = [BatchBytesItem(content=bytes([100, 97, 116, 97]), mime_type="application/x-unknown")] _ = batch_extract_bytes_sync(items, None) @pytest.mark.asyncio async def test_batch_file_async_basic() -> None: """Extract text from multiple files asynchronously.""" paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")] _ = await batch_extract_files(paths, None) @pytest.mark.asyncio async def test_batch_file_async_not_found() -> None: """batch_extract_file async nonexistent.""" paths = [BatchFileItem(path="/nonexistent/a.pdf")] _ = await batch_extract_files(paths, None) def test_batch_file_not_found() -> None: """batch_extract_file_sync nonexistent.""" paths = [BatchFileItem(path="/nonexistent/a.pdf"), BatchFileItem(path="/nonexistent/b.txt")] _ = batch_extract_files_sync(paths, None) def test_batch_file_partial() -> None: """batch_extract_file_sync mixed.""" paths = [BatchFileItem(path="text/plain.txt"), BatchFileItem(path="/nonexistent/missing.pdf")] _ = batch_extract_files_sync(paths, None) def test_batch_file_sync_basic() -> None: """Extract text from multiple files synchronously.""" paths = [BatchFileItem(path="pdf/fake_memo.pdf"), BatchFileItem(path="text/fake_text.txt")] _ = batch_extract_files_sync(paths, None)