118 lines
6.1 KiB
Dart
118 lines
6.1 KiB
Dart
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
// ignore_for_file: unused_local_variable
|
||
|
|
|
||
|
|
import 'package:test/test.dart';
|
||
|
|
import 'dart:io';
|
||
|
|
import 'package:kreuzberg/kreuzberg.dart';
|
||
|
|
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||
|
|
|
||
|
|
// E2e tests for category: smoke
|
||
|
|
|
||
|
|
String _alefE2eText(Object? value) {
|
||
|
|
if (value == null) return '';
|
||
|
|
// Check if it's an enum by examining its toString representation.
|
||
|
|
final str = value.toString();
|
||
|
|
if (str.contains('.')) {
|
||
|
|
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||
|
|
final parts = str.split('.');
|
||
|
|
if (parts.length == 2) {
|
||
|
|
final variantName = parts[1];
|
||
|
|
// Convert camelCase variant names to snake_case for serde compatibility.
|
||
|
|
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||
|
|
return _camelToSnake(variantName);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return str;
|
||
|
|
}
|
||
|
|
|
||
|
|
String _camelToSnake(String camel) {
|
||
|
|
final buffer = StringBuffer();
|
||
|
|
for (int i = 0; i < camel.length; i++) {
|
||
|
|
final char = camel[i];
|
||
|
|
if (char.contains(RegExp(r'[A-Z]'))) {
|
||
|
|
if (i > 0) buffer.write('_');
|
||
|
|
buffer.write(char.toLowerCase());
|
||
|
|
} else {
|
||
|
|
buffer.write(char);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return buffer.toString();
|
||
|
|
}
|
||
|
|
|
||
|
|
void main() {
|
||
|
|
setUpAll(() async {
|
||
|
|
await RustLib.init();
|
||
|
|
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||
|
|
final _dir = Directory(_testDocs);
|
||
|
|
if (_dir.existsSync()) Directory.current = _dir;
|
||
|
|
});
|
||
|
|
|
||
|
|
test('OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('images/test_hello_world.png').readAsBytesSync(), 'image/png');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('image/png'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(1));
|
||
|
|
expect(result.content.contains('Hello') || result.content.contains('World') || result.content.contains('hello') || result.content.contains('world'), isTrue);
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: DOCX with formatted text', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(20));
|
||
|
|
expect(result.content.contains('Lorem') || result.content.contains('ipsum') || result.content.contains('document') || result.content.contains('text'), isTrue);
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: HTML table extraction', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('html/simple_table.html').readAsBytesSync(), 'text/html');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('text/html'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(10));
|
||
|
|
expect(result.content.contains('Sample Data Table') || result.content.contains('Laptop') || result.content.contains('Electronics') || result.content.contains('Product'), isTrue);
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: PNG image (without OCR, metadata only)', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('images/sample.png').readAsBytesSync(), 'image/png', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: true, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
|
||
|
|
expect(result.mimeType.toString().trim(), equals('image/png'.toString().trim()));
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: JSON file extraction', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('json/simple.json').readAsBytesSync(), 'application/json');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('application/json'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(5));
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: PDF with simple text extraction', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(50));
|
||
|
|
expect(result.content.contains('May 5, 2023') || result.content.contains('To Whom it May Concern'), isTrue);
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: Plain text file', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('text/report.txt').readAsBytesSync(), 'text/plain');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('text/plain'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(5));
|
||
|
|
});
|
||
|
|
|
||
|
|
test('Smoke test: XLSX with basic spreadsheet data including tables', () async {
|
||
|
|
final result = await KreuzbergBridge.extractBytes(File('xlsx/stanley_cups.xlsx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
|
||
|
|
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'.toString().trim()));
|
||
|
|
expect(result.content.length, greaterThanOrEqualTo(100));
|
||
|
|
expect(result.content, contains('Team'));
|
||
|
|
expect(result.content, contains('Location'));
|
||
|
|
expect(result.content, contains('Stanley Cups'));
|
||
|
|
expect(result.content, contains('Blues'));
|
||
|
|
expect(result.content, contains('Flyers'));
|
||
|
|
expect(result.content, contains('Maple Leafs'));
|
||
|
|
expect(result.content, contains('STL'));
|
||
|
|
expect(result.content, contains('PHI'));
|
||
|
|
expect(result.content, contains('TOR'));
|
||
|
|
// skipped: field 'tables' not available on dart result type
|
||
|
|
// skipped: field 'metadata.format.excel.sheet_count' not available on dart result type
|
||
|
|
// skipped: field 'metadata.format.excel.sheet_names' not available on dart result type
|
||
|
|
});
|
||
|
|
|
||
|
|
}
|