Files
fil/e2e/dart/test/contract_test.dart

185 lines
12 KiB
Dart
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: contract
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Tests async batch bytes extraction API (batch_extract_bytes)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
test('Tests async batch file extraction API (batch_extract_file)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
test('Tests async bytes extraction API (extract_bytes)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests async file extraction API (extract_file)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests markdown chunker prepends heading hierarchy to chunk content', () async {
final _config = await createExtractionConfigFromJson(json: '{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}');
final result = await KreuzbergBridge.extractBytesSync(File('markdown/extraction_test.md').readAsBytesSync(), 'text/markdown', _config);
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'chunks' not available on dart result type
// skipped: field 'chunks_have_content' not available on dart result type
// skipped: field 'chunks_have_heading_context' not available on dart result type
// skipped: field 'first_chunk_starts_with_heading' not available on dart result type
});
test('Tests document structure with DOCX heading-driven nesting', () async {
final result = await KreuzbergBridge.extractBytesSync(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: true, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim()));
// skipped: field 'document' not available on dart result type
// skipped: field 'document.nodes' not available on dart result type
});
test('Tests element-based result format with element type assertions on DOCX', () async {
final _config = await createExtractionConfigFromJson(json: '{"result_format":"element_based"}');
final result = await KreuzbergBridge.extractBytesSync(File('docx/unit_test_headers.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', _config);
expect(result.mimeType.contains('application/vnd.openxmlformats-officedocument.wordprocessingml.document'), isTrue);
// skipped: field 'elements' not available on dart result type
});
test('Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions', () async {
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
});
test('Tests keyword extraction via YAKE algorithm', () async {
final _config = await createExtractionConfigFromJson(json: '{"keywords":{"algorithm":"yake","max_keywords":10}}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'keywords' not available on dart result type
// skipped: field 'keywords' not available on dart result type
});
test('Tests page extraction and page marker configuration', () async {
final _config = await createExtractionConfigFromJson(json: '{"pages":{"extract_pages":true,"insert_page_markers":true}}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('PAGE'), isTrue);
});
test('Tests quality scoring produces a score value in [0.0, 1.0]', () async {
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'quality_score' not available on dart result type
// skipped: field 'quality_score' not available on dart result type
// skipped: field 'quality_score' not available on dart result type
});
test('Tests archive extraction with custom security limits', () async {
final _config = await createExtractionConfigFromJson(json: '{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}');
final result = await KreuzbergBridge.extractBytesSync(File('archives/documents.zip').readAsBytesSync(), 'application/zip', _config);
expect(result.mimeType.contains('application/zip') || result.mimeType.contains('application/x-zip-compressed'), isTrue);
expect(result.content.length, greaterThanOrEqualTo(10));
});
test('Tests tree-sitter configuration round-trip', () async {
final _config = await createExtractionConfigFromJson(json: '{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}');
final result = await KreuzbergBridge.extractFileSync('code/hello.py', 'text/x-source-code', _config);
expect(result.mimeType.toString().trim(), equals('text/x-source-code'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(5));
});
test('Tests markdown output format via bytes extraction API', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
test('Tests Markdown output format', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
}