Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
```dart title="Dart"
import 'dart:convert';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final Uint8List first = Uint8List.fromList(utf8.encode('Hello, world!'));
final Uint8List second = Uint8List.fromList(utf8.encode('<html>test</html>'));
final items = <BatchBytesItem>[
BatchBytesItem(content: first, mimeType: 'text/plain'),
BatchBytesItem(
content: second,
mimeType: 'text/html',
config: const FileExtractionConfig(forceOcr: true),
),
];
// Sync semantics — flutter_rust_bridge still returns a Future from Dart.
final results = await KreuzbergBridge.batchExtractBytesSync(items);
print('Processed ${results.length} items');
for (final result in results) {
print('${result.mimeType}: ${result.content.length} chars');
}
}
```

View File

@@ -0,0 +1,21 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final items = <BatchFileItem>[
const BatchFileItem(path: 'doc1.pdf'),
BatchFileItem(
path: 'scan.pdf',
config: FileExtractionConfig(forceOcr: true),
),
];
// Sync semantics — flutter_rust_bridge still returns a Future from Dart.
final results = await KreuzbergBridge.batchExtractFilesSync(items);
print('Processed ${results.length} files');
for (final result in results) {
print('${result.mimeType}: ${result.content.length} chars');
}
}
```

View File

@@ -0,0 +1,43 @@
```dart title="Dart"
import 'dart:convert';
import 'dart:io';
import 'package:http/http.dart' as http;
Future<void> main() async {
final file = File('document.pdf');
final bytes = await file.readAsBytes();
final request = http.MultipartRequest(
'POST',
Uri.parse('http://localhost:8000/extract'),
)
..files.add(
http.MultipartFile.fromBytes(
'file',
bytes,
filename: 'document.pdf',
),
)
..fields['chunking'] = jsonEncode({
'max_characters': 800,
'overlap': 100,
});
final streamed = await request.send();
final response = await http.Response.fromStream(streamed);
if (response.statusCode >= 400) {
throw HttpException('Server returned ${response.statusCode}: ${response.body}');
}
final result = jsonDecode(response.body) as Map<String, dynamic>;
final chunks = result['chunks'] as List<dynamic>?;
if (chunks != null) {
print('${chunks.length} chunks');
for (final chunk in chunks) {
final content = (chunk as Map<String, dynamic>)['content'] as String? ?? '';
print(' ${content.length} chars');
}
}
}
```

View File

@@ -0,0 +1,31 @@
```dart title="Dart"
import 'dart:convert';
import 'dart:io';
import 'package:http/http.dart' as http;
Future<void> main() async {
final file = File('document.pdf');
final bytes = await file.readAsBytes();
final request = http.MultipartRequest(
'POST',
Uri.parse('http://localhost:8000/extract'),
)..files.add(
http.MultipartFile.fromBytes(
'file',
bytes,
filename: 'document.pdf',
),
);
final streamed = await request.send();
final response = await http.Response.fromStream(streamed);
if (response.statusCode >= 400) {
throw HttpException('Server returned ${response.statusCode}: ${response.body}');
}
final result = jsonDecode(response.body) as Map<String, dynamic>;
print(result['content'] ?? '');
}
```

View File

@@ -0,0 +1,65 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
// OCR: Tesseract on English text
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
// Chunking: ~800-character markdown chunks with 100-char overlap
chunking: const ChunkingConfig(
maxCharacters: 800,
overlap: 100,
trim: true,
chunkerType: ChunkerType.markdown,
sizing: ChunkSizing.characters(),
prependHeadingContext: true,
),
// Image extraction
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 150,
maxImageDimension: 4096,
injectPlaceholders: false,
autoAdjustDpi: true,
minDpi: 72,
maxDpi: 300,
classify: false,
),
// Output: markdown with full document structure
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.markdown(),
includeDocumentStructure: true,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('report.pdf', null, config);
print('Content (${result.content.length} chars):');
final preview = result.content.substring(
0,
result.content.length < 200 ? result.content.length : 200,
);
print(preview);
if (result.chunks != null) {
print('\nChunks: ${result.chunks!.length}');
}
print('Tables: ${result.tables.length}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
if (result.extractionMethod != null) {
print('Extraction method: ${result.extractionMethod}');
}
}
```

View File

@@ -0,0 +1,15 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
try {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
} on Exception catch (e) {
// flutter_rust_bridge converts every KreuzbergError variant
// (Io / UnsupportedFormat / Parsing / MissingDependency, ...)
// into a Dart exception whose message preserves the original context.
print('Extraction failed: $e');
}
}
```

View File

@@ -0,0 +1,37 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.markdown(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
try {
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Extracted ${result.content.length} chars');
print('MIME: ${result.mimeType}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
} on Exception catch (e) {
final message = e.toString();
if (message.contains('UnsupportedFormat')) {
print('Unsupported format: $message');
} else if (message.contains('MissingDependency')) {
print('Install the required dependency: $message');
} else if (message.contains('Parsing')) {
print('Corrupt or invalid document: $message');
} else {
print('Extraction failed: $message');
}
}
}
```

View File

@@ -0,0 +1,14 @@
```dart title="Dart"
import 'dart:io';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final Uint8List bytes = await File('document.pdf').readAsBytes();
final result = await KreuzbergBridge.extractBytes(bytes, 'application/pdf');
print(result.content);
print('MIME type: ${result.mimeType}');
}
```

View File

@@ -0,0 +1,16 @@
```dart title="Dart"
import 'dart:io';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final Uint8List bytes = await File('document.pdf').readAsBytes();
// Sync semantics — flutter_rust_bridge surfaces every call as a Future,
// so even the *Sync entrypoints must be awaited from Dart.
final result = await KreuzbergBridge.extractBytesSync(bytes, 'application/pdf');
print(result.content);
print('MIME type: ${result.mimeType}');
}
```

View File

@@ -0,0 +1,11 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
print('MIME type: ${result.mimeType}');
print('Tables: ${result.tables.length}');
}
```

View File

@@ -0,0 +1,13 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Sync semantics — flutter_rust_bridge surfaces every call as a Future,
// so even the *Sync entrypoints must be awaited from Dart.
final result = await KreuzbergBridge.extractFileSync('document.pdf', null);
print(result.content);
print('MIME type: ${result.mimeType}');
print('Tables: ${result.tables.length}');
}
```