// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef // ignore_for_file: unused_local_variable import 'package:test/test.dart'; import 'dart:io'; import 'package:kreuzberg/kreuzberg.dart'; import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib; // E2e tests for category: contract String _alefE2eText(Object? value) { if (value == null) return ''; // Check if it's an enum by examining its toString representation. final str = value.toString(); if (str.contains('.')) { // Enum.toString() returns 'EnumName.variantName'. Extract the variant name. final parts = str.split('.'); if (parts.length == 2) { final variantName = parts[1]; // Convert camelCase variant names to snake_case for serde compatibility. // E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'. return _camelToSnake(variantName); } } return str; } String _camelToSnake(String camel) { final buffer = StringBuffer(); for (int i = 0; i < camel.length; i++) { final char = camel[i]; if (char.contains(RegExp(r'[A-Z]'))) { if (i > 0) buffer.write('_'); buffer.write(char.toLowerCase()); } else { buffer.write(char); } } return buffer.toString(); } void main() { setUpAll(() async { await RustLib.init(); final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents'; final _dir = Directory(_testDocs); if (_dir.existsSync()) Directory.current = _dir; }); test('Tests async batch bytes extraction API (batch_extract_bytes)', () async { final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf'); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue); }); test('Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)', () async { final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}'); final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'metadata.output_format' not available on dart result type }); test('Tests async batch file extraction API (batch_extract_file)', () async { final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf'); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue); }); test('Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)', () async { final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}'); final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'metadata.output_format' not available on dart result type }); test('Tests async bytes extraction API (extract_bytes)', () async { final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf'); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue); }); test('Tests async file extraction API (extract_file)', () async { final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf'); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue); }); test('Tests markdown chunker prepends heading hierarchy to chunk content', () async { final _config = await createExtractionConfigFromJson(json: '{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}'); final result = await KreuzbergBridge.extractBytesSync(File('markdown/extraction_test.md').readAsBytesSync(), 'text/markdown', _config); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'chunks' not available on dart result type // skipped: field 'chunks_have_content' not available on dart result type // skipped: field 'chunks_have_heading_context' not available on dart result type // skipped: field 'first_chunk_starts_with_heading' not available on dart result type }); test('Tests document structure with DOCX heading-driven nesting', () async { final result = await KreuzbergBridge.extractBytesSync(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: true, useLayoutForMarkdown: false, maxArchiveDepth: 3)); expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim())); // skipped: field 'document' not available on dart result type // skipped: field 'document.nodes' not available on dart result type }); test('Tests element-based result format with element type assertions on DOCX', () async { final _config = await createExtractionConfigFromJson(json: '{"result_format":"element_based"}'); final result = await KreuzbergBridge.extractBytesSync(File('docx/unit_test_headers.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', _config); expect(result.mimeType.contains('application/vnd.openxmlformats-officedocument.wordprocessingml.document'), isTrue); // skipped: field 'elements' not available on dart result type }); test('Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions', () async { final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3)); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); }); test('Tests keyword extraction via YAKE algorithm', () async { final _config = await createExtractionConfigFromJson(json: '{"keywords":{"algorithm":"yake","max_keywords":10}}'); final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'keywords' not available on dart result type // skipped: field 'keywords' not available on dart result type }); test('Tests page extraction and page marker configuration', () async { final _config = await createExtractionConfigFromJson(json: '{"pages":{"extract_pages":true,"insert_page_markers":true}}'); final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); expect(result.content.contains('PAGE'), isTrue); }); test('Tests quality scoring produces a score value in [0.0, 1.0]', () async { final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3)); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'quality_score' not available on dart result type // skipped: field 'quality_score' not available on dart result type // skipped: field 'quality_score' not available on dart result type }); test('Tests archive extraction with custom security limits', () async { final _config = await createExtractionConfigFromJson(json: '{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}'); final result = await KreuzbergBridge.extractBytesSync(File('archives/documents.zip').readAsBytesSync(), 'application/zip', _config); expect(result.mimeType.contains('application/zip') || result.mimeType.contains('application/x-zip-compressed'), isTrue); expect(result.content.length, greaterThanOrEqualTo(10)); }); test('Tests tree-sitter configuration round-trip', () async { final _config = await createExtractionConfigFromJson(json: '{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}'); final result = await KreuzbergBridge.extractFileSync('code/hello.py', 'text/x-source-code', _config); expect(result.mimeType.toString().trim(), equals('text/x-source-code'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(5)); }); test('Tests markdown output format via bytes extraction API', () async { final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}'); final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'metadata.output_format' not available on dart result type }); test('Tests Markdown output format', () async { final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}'); final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config); expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim())); expect(result.content.length, greaterThanOrEqualTo(10)); // skipped: field 'metadata.output_format' not available on dart result type }); }