Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 500,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
),
pages: const PageConfig(
extractPages: true,
insertPageMarkers: false,
markerFormat: '',
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
for (final chunk in chunks) {
final first = chunk.metadata.firstPage;
final last = chunk.metadata.lastPage;
if (first != null && last != null) {
final preview = chunk.content.length > 50
? chunk.content.substring(0, 50)
: chunk.content;
final pageRange = first == last ? 'Page $first' : 'Pages $first-$last';
print('Chunk: $preview... ($pageRange)');
}
}
}
```

View File

@@ -0,0 +1,29 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.markdown,
sizing: ChunkSizing.characters(),
prependHeadingContext: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
}
```

View File

@@ -0,0 +1,50 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 500,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
final chunks = result.chunks ?? const [];
for (final chunk in chunks) {
final index = chunk.metadata.chunkIndex;
final total = chunk.metadata.totalChunks;
final start = chunk.metadata.byteStart;
final end = chunk.metadata.byteEnd;
final preview = chunk.content.length > 100
? chunk.content.substring(0, 100)
: chunk.content;
print('Chunk ${index + 1}/$total');
print('Position: $start-$end');
print('Content: $preview...');
final embedding = chunk.embedding;
if (embedding != null) {
print('Embedding: ${embedding.length} dimensions');
}
}
}
```

View File

@@ -0,0 +1,35 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1024,
overlap: 100,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks with embeddings: ${chunks.length}');
}
```

View File

@@ -0,0 +1,29 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.3,
ngramRange: Int64List.fromList(<int>[1, 3]),
language: 'en',
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Keywords: ${result.extractedKeywords}');
}
```

View File

@@ -0,0 +1,33 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.3,
ngramRange: Int64List.fromList(<int>[1, 3]),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
final keywords = result.extractedKeywords;
if (keywords != null) {
for (final keyword in keywords) {
print('${keyword.text} (score: ${keyword.score})');
}
}
}
```

View File

@@ -0,0 +1,25 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
languageDetection: const LanguageDetectionConfig(
enabled: true,
minConfidence: 0.8,
detectMultiple: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Detected languages: ${result.detectedLanguages}');
}
```

View File

@@ -0,0 +1,25 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
languageDetection: const LanguageDetectionConfig(
enabled: true,
minConfidence: 0.8,
detectMultiple: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('multilingual_document.pdf', null, config);
print('Detected languages: ${result.detectedLanguages}');
}
```

View File

@@ -0,0 +1,20 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Quality score: ${result.qualityScore}');
}
```

View File

@@ -0,0 +1,30 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned_document.pdf', null, config);
final score = result.qualityScore;
if (score != null) {
if (score < 0.5) {
print('Warning: Low quality extraction (${score.toStringAsFixed(2)})');
} else {
print('Quality score: ${score.toStringAsFixed(2)}');
}
}
for (final warning in result.processingWarnings) {
print('Warning: $warning');
}
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Reduced content length: ${result.content.length}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('verbose_document.pdf', null, config);
print('Content length after reduction: ${result.content.length}');
}
```

View File

@@ -0,0 +1,77 @@
```dart title="Dart"
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
class VectorRecord {
VectorRecord({
required this.id,
required this.content,
required this.embedding,
required this.metadata,
});
final String id;
final String content;
final Float64List embedding;
final Map<String, String> metadata;
}
Future<List<VectorRecord>> extractAndVectorize(
String documentPath,
String documentId,
) async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 512,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
final records = <VectorRecord>[];
final chunks = result.chunks ?? const [];
for (var index = 0; index < chunks.length; index++) {
final chunk = chunks[index];
final embedding = chunk.embedding;
if (embedding == null) {
continue;
}
records.add(VectorRecord(
id: '${documentId}_chunk_$index',
content: chunk.content,
embedding: embedding,
metadata: <String, String>{
'document_id': documentId,
'chunk_index': index.toString(),
'content_length': chunk.content.length.toString(),
},
));
}
return records;
}
Future<void> main() async {
final records = await extractAndVectorize('document.pdf', 'doc-001');
print('Vector records: ${records.length}');
}
```

View File

@@ -0,0 +1,28 @@
```dart title="Dart"
import 'dart:convert';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final Uint8List first = Uint8List.fromList(utf8.encode('Hello, world!'));
final Uint8List second = Uint8List.fromList(utf8.encode('<html>test</html>'));
final items = <BatchBytesItem>[
BatchBytesItem(content: first, mimeType: 'text/plain'),
BatchBytesItem(
content: second,
mimeType: 'text/html',
config: const FileExtractionConfig(forceOcr: true),
),
];
// Sync semantics — flutter_rust_bridge still returns a Future from Dart.
final results = await KreuzbergBridge.batchExtractBytesSync(items);
print('Processed ${results.length} items');
for (final result in results) {
print('${result.mimeType}: ${result.content.length} chars');
}
}
```

View File

@@ -0,0 +1,21 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final items = <BatchFileItem>[
const BatchFileItem(path: 'doc1.pdf'),
BatchFileItem(
path: 'scan.pdf',
config: FileExtractionConfig(forceOcr: true),
),
];
// Sync semantics — flutter_rust_bridge still returns a Future from Dart.
final results = await KreuzbergBridge.batchExtractFilesSync(items);
print('Processed ${results.length} files');
for (final result in results) {
print('${result.mimeType}: ${result.content.length} chars');
}
}
```

View File

@@ -0,0 +1,43 @@
```dart title="Dart"
import 'dart:convert';
import 'dart:io';
import 'package:http/http.dart' as http;
Future<void> main() async {
final file = File('document.pdf');
final bytes = await file.readAsBytes();
final request = http.MultipartRequest(
'POST',
Uri.parse('http://localhost:8000/extract'),
)
..files.add(
http.MultipartFile.fromBytes(
'file',
bytes,
filename: 'document.pdf',
),
)
..fields['chunking'] = jsonEncode({
'max_characters': 800,
'overlap': 100,
});
final streamed = await request.send();
final response = await http.Response.fromStream(streamed);
if (response.statusCode >= 400) {
throw HttpException('Server returned ${response.statusCode}: ${response.body}');
}
final result = jsonDecode(response.body) as Map<String, dynamic>;
final chunks = result['chunks'] as List<dynamic>?;
if (chunks != null) {
print('${chunks.length} chunks');
for (final chunk in chunks) {
final content = (chunk as Map<String, dynamic>)['content'] as String? ?? '';
print(' ${content.length} chars');
}
}
}
```

View File

@@ -0,0 +1,31 @@
```dart title="Dart"
import 'dart:convert';
import 'dart:io';
import 'package:http/http.dart' as http;
Future<void> main() async {
final file = File('document.pdf');
final bytes = await file.readAsBytes();
final request = http.MultipartRequest(
'POST',
Uri.parse('http://localhost:8000/extract'),
)..files.add(
http.MultipartFile.fromBytes(
'file',
bytes,
filename: 'document.pdf',
),
);
final streamed = await request.send();
final response = await http.Response.fromStream(streamed);
if (response.statusCode >= 400) {
throw HttpException('Server returned ${response.statusCode}: ${response.body}');
}
final result = jsonDecode(response.body) as Map<String, dynamic>;
print(result['content'] ?? '');
}
```

View File

@@ -0,0 +1,65 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
// OCR: Tesseract on English text
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
// Chunking: ~800-character markdown chunks with 100-char overlap
chunking: const ChunkingConfig(
maxCharacters: 800,
overlap: 100,
trim: true,
chunkerType: ChunkerType.markdown,
sizing: ChunkSizing.characters(),
prependHeadingContext: true,
),
// Image extraction
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 150,
maxImageDimension: 4096,
injectPlaceholders: false,
autoAdjustDpi: true,
minDpi: 72,
maxDpi: 300,
classify: false,
),
// Output: markdown with full document structure
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.markdown(),
includeDocumentStructure: true,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('report.pdf', null, config);
print('Content (${result.content.length} chars):');
final preview = result.content.substring(
0,
result.content.length < 200 ? result.content.length : 200,
);
print(preview);
if (result.chunks != null) {
print('\nChunks: ${result.chunks!.length}');
}
print('Tables: ${result.tables.length}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
if (result.extractionMethod != null) {
print('Extraction method: ${result.extractionMethod}');
}
}
```

View File

@@ -0,0 +1,15 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
try {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
} on Exception catch (e) {
// flutter_rust_bridge converts every KreuzbergError variant
// (Io / UnsupportedFormat / Parsing / MissingDependency, ...)
// into a Dart exception whose message preserves the original context.
print('Extraction failed: $e');
}
}
```

View File

@@ -0,0 +1,37 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.markdown(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
try {
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Extracted ${result.content.length} chars');
print('MIME: ${result.mimeType}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
} on Exception catch (e) {
final message = e.toString();
if (message.contains('UnsupportedFormat')) {
print('Unsupported format: $message');
} else if (message.contains('MissingDependency')) {
print('Install the required dependency: $message');
} else if (message.contains('Parsing')) {
print('Corrupt or invalid document: $message');
} else {
print('Extraction failed: $message');
}
}
}
```

View File

@@ -0,0 +1,14 @@
```dart title="Dart"
import 'dart:io';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final Uint8List bytes = await File('document.pdf').readAsBytes();
final result = await KreuzbergBridge.extractBytes(bytes, 'application/pdf');
print(result.content);
print('MIME type: ${result.mimeType}');
}
```

View File

@@ -0,0 +1,16 @@
```dart title="Dart"
import 'dart:io';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final Uint8List bytes = await File('document.pdf').readAsBytes();
// Sync semantics — flutter_rust_bridge surfaces every call as a Future,
// so even the *Sync entrypoints must be awaited from Dart.
final result = await KreuzbergBridge.extractBytesSync(bytes, 'application/pdf');
print(result.content);
print('MIME type: ${result.mimeType}');
}
```

View File

@@ -0,0 +1,11 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
print('MIME type: ${result.mimeType}');
print('Tables: ${result.tables.length}');
}
```

View File

@@ -0,0 +1,13 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Sync semantics — flutter_rust_bridge surfaces every call as a Future,
// so even the *Sync entrypoints must be awaited from Dart.
final result = await KreuzbergBridge.extractFileSync('document.pdf', null);
print(result.content);
print('MIME type: ${result.mimeType}');
print('Tables: ${result.tables.length}');
}
```

View File

@@ -0,0 +1,64 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
languageDetection: const LanguageDetectionConfig(
enabled: true,
minConfidence: 0.8,
detectMultiple: false,
),
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.1,
ngramRange: Int64List.fromList(<int>[1, 3]),
language: 'en',
),
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
postprocessor: const PostProcessorConfig(enabled: true),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Content: ${result.content}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
for (final chunk in chunks) {
print('Length: ${chunk.content.length}');
}
}
```

View File

@@ -0,0 +1,20 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,22 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Dart bindings do not expose config-file discovery. Build a default
// ExtractionConfig in code and pass it explicitly to KreuzbergBridge.extractFile.
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,27 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print('Content length: ${result.content.length}');
print('Tables detected: ${result.tables.length}');
}
```

View File

@@ -0,0 +1,56 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng+deu',
autoRotate: false,
tesseractConfig: TesseractConfig(
language: 'eng+deu',
psm: 6,
outputFormat: 'text',
oem: 3,
minConfidence: 0.0,
enableTableDetection: false,
tableMinConfidence: 0.5,
tableColumnThreshold: 20,
tableRowThresholdRatio: 0.5,
useCache: true,
classifyUsePreAdaptedTemplates: false,
languageModelNgramOn: false,
tesseditDontBlkrejGoodWds: false,
tesseditDontRowrejGoodWds: false,
tesseditEnableDictCorrection: false,
tesseditCharWhitelist: '',
tesseditCharBlacklist: '',
tesseditUsePrimaryParamsModel: false,
textordSpaceSizeIsVariable: false,
thresholdingMethod: false,
),
),
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Content length: ${result.content.length}');
}
```

View File

@@ -0,0 +1,23 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: true,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final document = result.document;
if (document != null) {
print('Document nodes: ${document.nodes.length}');
}
}
```

View File

@@ -0,0 +1,29 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.elementBased,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final elements = result.elements ?? const [];
for (final element in elements) {
print('Type: ${element.elementType}');
final preview = element.text.substring(
0,
element.text.length < 100 ? element.text.length : 100,
);
print('Text: $preview');
print('---');
}
}
```

View File

@@ -0,0 +1,35 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 16,
showDownloadProgress: true,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks with embeddings: ${chunks.length}');
}
```

View File

@@ -0,0 +1,25 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
htmlOutput: const HtmlOutputConfig(
theme: HtmlTheme.gitHub,
classPrefix: 'kb-',
embedCss: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.html(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,29 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.1,
ngramRange: Int64List.fromList(<int>[1, 3]),
language: 'en',
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Keywords: ${result.extractedKeywords}');
}
```

View File

@@ -0,0 +1,25 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
languageDetection: const LanguageDetectionConfig(
enabled: true,
minConfidence: 0.8,
detectMultiple: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Detected languages: ${result.detectedLanguages}');
}
```

View File

@@ -0,0 +1,31 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 300,
maxImageDimension: 4096,
injectPlaceholders: false,
autoAdjustDpi: true,
minDpi: 150,
maxDpi: 600,
classify: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final images = result.images ?? const [];
print('Extracted images: ${images.length}');
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
pdfOptions: const PdfConfig(
extractImages: true,
passwords: <String>['password123'],
extractMetadata: true,
extractAnnotations: false,
allowSingleColumnTables: false,
hierarchy: HierarchyConfig(
enabled: true,
kClusters: 4,
includeBbox: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('encrypted.pdf', null, config);
print('Title: ${result.metadata.title}');
}
```

View File

@@ -0,0 +1,33 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
pdfOptions: const PdfConfig(
extractImages: false,
extractMetadata: true,
extractAnnotations: false,
allowSingleColumnTables: false,
hierarchy: HierarchyConfig(
enabled: true,
kClusters: 5,
includeBbox: true,
ocrCoverageThreshold: 0.8,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final pages = result.pages ?? const [];
print('Pages with hierarchy: ${pages.where((p) => p.hierarchy != null).length}');
}
```

View File

@@ -0,0 +1,27 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
postprocessor: const PostProcessorConfig(
enabled: true,
enabledProcessors: <String>[
'whitespace_normalizer',
'unicode_normalizer',
],
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Processed content: ${result.content}');
}
```

View File

@@ -0,0 +1,21 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Quality score: ${result.qualityScore}');
print('Warnings: ${result.processingWarnings.length}');
}
```

View File

@@ -0,0 +1,48 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng+deu',
autoRotate: false,
tesseractConfig: TesseractConfig(
language: 'eng+deu',
psm: 6,
outputFormat: 'text',
oem: 3,
minConfidence: 0.0,
enableTableDetection: false,
tableMinConfidence: 0.5,
tableColumnThreshold: 20,
tableRowThresholdRatio: 0.5,
useCache: true,
classifyUsePreAdaptedTemplates: false,
languageModelNgramOn: false,
tesseditDontBlkrejGoodWds: false,
tesseditDontRowrejGoodWds: false,
tesseditEnableDictCorrection: false,
tesseditCharWhitelist: '',
tesseditCharBlacklist: '',
tesseditUsePrimaryParamsModel: false,
textordSpaceSizeIsVariable: false,
thresholdingMethod: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print('OCR text: ${result.content}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Reduced content: ${result.content}');
}
```

View File

@@ -0,0 +1,12 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Default ExtractionConfig — flutter_rust_bridge surfaces every call
// as a Future, so even non-async-flavored entrypoints must be awaited.
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
print('MIME type: ${result.mimeType}');
}
```

View File

@@ -0,0 +1,11 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
print('MIME type: ${result.mimeType}');
print('Tables: ${result.tables.length}');
}
```

View File

@@ -0,0 +1,28 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final ocr = OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
);
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: true,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false, ocr: ocr,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print(result.content);
print('Detected languages: ${result.detectedLanguages}');
}
```

View File

@@ -0,0 +1,9 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
print('Hello from kreuzberg!');
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
}
```

View File

@@ -0,0 +1,7 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
print('kreuzberg loaded successfully');
}
```

View File

@@ -0,0 +1,20 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
print(result.content);
for (final table in result.tables) {
print('Table: $table');
}
final chunks = result.chunks;
if (chunks != null) {
for (final chunk in chunks) {
print('Chunk: $chunk');
}
}
}
```

View File

@@ -0,0 +1,47 @@
<!-- snippet:syntax-only -->
```dart title="Dart"
import 'dart:convert';
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final schema = jsonEncode(<String, Object?>{
'type': 'object',
'properties': <String, Object?>{
'title': <String, Object?>{'type': 'string'},
'authors': <String, Object?>{
'type': 'array',
'items': <String, Object?>{'type': 'string'},
},
'date': <String, Object?>{'type': 'string'},
},
'required': <String>['title', 'authors', 'date'],
'additionalProperties': false,
});
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
structuredExtraction: StructuredExtractionConfig(
schema: schema,
schemaName: 'paper_metadata',
strict: true,
llm: const LlmConfig(model: 'openai/gpt-4o-mini'),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('paper.pdf', null, config);
final structured = result.structuredOutput;
if (structured != null) {
print(structured);
}
}
```

View File

@@ -0,0 +1,33 @@
<!-- snippet:syntax-only -->
```dart title="Dart"
import 'dart:convert';
import 'dart:io';
Future<void> main() async {
final process = await Process.start('kreuzberg', <String>['mcp']);
final request = <String, Object?>{
'method': 'tools/call',
'params': <String, Object?>{
'name': 'extract_file',
'arguments': <String, Object?>{
'path': 'document.pdf',
'async': true,
},
},
};
process.stdin.writeln(jsonEncode(request));
await process.stdin.flush();
await process.stdin.close();
final line = await process.stdout
.transform(utf8.decoder)
.transform(const LineSplitter())
.first;
print(line);
await process.exitCode;
}
```

View File

@@ -0,0 +1,15 @@
<!-- snippet:syntax-only -->
```dart title="Dart"
import 'dart:io';
Future<void> main() async {
final process = await Process.start(
'kreuzberg',
<String>['mcp'],
mode: ProcessStartMode.inheritStdio,
);
final exitCode = await process.exitCode;
exit(exitCode);
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final languageDetection = LanguageDetectionConfig(
enabled: true,
minConfidence: 0.5,
detectMultiple: false,
);
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false, languageDetection: languageDetection,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final detected = result.detectedLanguages;
if (detected != null && detected.isNotEmpty) {
print('Primary language: ${detected.first}');
} else {
print('No language detected');
}
}
```

View File

@@ -0,0 +1,36 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final languageDetection = LanguageDetectionConfig(
enabled: true,
minConfidence: 0.3,
detectMultiple: true,
);
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false, languageDetection: languageDetection,
);
final result = await KreuzbergBridge.extractFile('multilingual.pdf', null, config);
final detected = result.detectedLanguages;
if (detected == null || detected.isEmpty) {
print('No languages detected');
return;
}
print('Detected ${detected.length} language(s):');
for (final language in detected) {
print(' - $language');
}
}
```

View File

@@ -0,0 +1,38 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
final metadata = result.metadata;
if (metadata.title != null) {
print('Title: ${metadata.title}');
}
if (metadata.subject != null) {
print('Subject: ${metadata.subject}');
}
if (metadata.authors != null) {
print('Authors: ${metadata.authors!.join(', ')}');
}
if (metadata.keywords != null) {
print('Keywords: ${metadata.keywords!.join(', ')}');
}
if (metadata.language != null) {
print('Language: ${metadata.language}');
}
if (metadata.createdAt != null) {
print('Created: ${metadata.createdAt}');
}
if (metadata.modifiedAt != null) {
print('Modified: ${metadata.modifiedAt}');
}
if (metadata.extractionDurationMs != null) {
print('Extraction took: ${metadata.extractionDurationMs} ms');
}
for (final entry in metadata.additional.entries) {
print('Additional[${entry.key}]: ${entry.value}');
}
}
```

View File

@@ -0,0 +1,31 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
final pages = result.metadata.pages;
if (pages == null) {
print('No page structure available');
return;
}
final boundaries = pages.boundaries;
if (boundaries == null || boundaries.isEmpty) {
print('No page boundaries available');
return;
}
final content = result.content;
for (final boundary in boundaries.take(3)) {
final start = boundary.byteStart.toInt();
final end = boundary.byteEnd.toInt();
final pageText = content.substring(start, end);
final previewEnd = pageText.length < 100 ? pageText.length : 100;
print('Page ${boundary.pageNumber}:');
print(' Byte range: $start-$end');
print(' Preview: ${pageText.substring(0, previewEnd)}...');
}
}
```

View File

@@ -0,0 +1,38 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final pageConfig = PageConfig(
extractPages: true,
insertPageMarkers: false,
markerFormat: '<!-- page {page} -->',
);
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false, pages: pageConfig,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final pages = result.pages;
if (pages == null) {
print('No per-page content available');
return;
}
for (final page in pages) {
print('Page ${page.pageNumber}:');
print(' Content: ${page.content.length} chars');
print(' Tables: ${page.tables.length}');
print(' Images: ${page.images.length}');
}
}
```

View File

@@ -0,0 +1,20 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final result = await KreuzbergBridge.extractFile('document.pdf', null);
for (final table in result.tables) {
print('Table on page ${table.pageNumber} with ${table.cells.length} rows');
print(table.markdown);
for (final row in table.cells) {
print(row);
}
if (table.boundingBox != null) {
print('Bounding box: ${table.boundingBox}');
}
}
}
```

View File

@@ -0,0 +1,92 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
class VectorRecord {
final String id;
final List<double> embedding;
final String content;
final Map<String, Object?> metadata;
const VectorRecord({
required this.id,
required this.embedding,
required this.content,
required this.metadata,
});
}
void storeInVectorDatabase(List<VectorRecord> records) {
for (final record in records) {
if (record.embedding.isEmpty) {
continue;
}
print(
'Storing ${record.id}: ${record.content.length} chars, '
'${record.embedding.length} dims',
);
}
}
Future<List<VectorRecord>> extractAndVectorize(
String documentPath,
String documentId,
) async {
final embedding = EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
);
final chunking = ChunkingConfig(
maxCharacters: 512,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
embedding: embedding,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
);
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false, chunking: chunking,
);
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
final chunks = result.chunks ?? const <Chunk>[];
final records = <VectorRecord>[];
for (var index = 0; index < chunks.length; index++) {
final chunk = chunks[index];
final embeddingValues = chunk.embedding?.toList() ?? const <double>[];
records.add(
VectorRecord(
id: '${documentId}_chunk_$index',
content: chunk.content,
embedding: embeddingValues,
metadata: {
'document_id': documentId,
'chunk_index': index,
'content_length': chunk.content.length,
},
),
);
}
storeInVectorDatabase(records);
return records;
}
Future<void> main() async {
await extractAndVectorize('document.pdf', 'doc-1');
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Cloud OCR backends are registered in the Rust core. From Dart, select a
// registered backend by name. Use `KreuzbergBridge.listOcrBackends()` to
// discover available backends at runtime.
final backends = await KreuzbergBridge.listOcrBackends();
print('Available OCR backends: $backends');
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'cloud',
language: 'en',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,31 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 300,
maxImageDimension: 4096,
injectPlaceholders: false,
autoAdjustDpi: true,
minDpi: 150,
maxDpi: 600,
classify: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final images = result.images ?? const [];
print('Extracted images: ${images.length}');
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 300,
maxImageDimension: 4096,
injectPlaceholders: true,
autoAdjustDpi: true,
minDpi: 150,
maxDpi: 600,
maxImagesPerPage: 20,
classify: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final images = result.images ?? const [];
print('Preprocessed images: ${images.length}');
}
```

View File

@@ -0,0 +1,26 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'easyocr',
language: 'en',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,38 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'paddleocr',
language: 'en',
autoRotate: false,
elementConfig: OcrElementConfig(
includeElements: true,
minLevel: OcrElementLevel.word,
minConfidence: 0.0,
buildHierarchy: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
final elements = result.ocrElements ?? const <OcrElement>[];
for (final element in elements) {
print('Text: ${element.text}');
print('Confidence: ${element.confidence.recognition.toStringAsFixed(2)}');
print('Level: ${element.level}');
print('Page: ${element.pageNumber}');
}
}
```

View File

@@ -0,0 +1,26 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,26 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: true,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,26 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng+deu+fra',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('multilingual.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,26 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'paddleocr',
language: 'en',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,16 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// The Dart binding exposes bulk-clear entry points for OCR backends,
// post-processors, and validators. Document-extractor clearing is not
// surfaced through flutter_rust_bridge; the built-in extractors are
// registered automatically by the kreuzberg core when the library
// initializes.
await KreuzbergBridge.clearOcrBackends();
await KreuzbergBridge.clearPostProcessors();
await KreuzbergBridge.clearValidators();
print('OCR backends, post-processors, and validators cleared');
}
```

View File

@@ -0,0 +1,15 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createEmbeddingBackendDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom embedding backends must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A Dart implementation of the `EmbeddingBackend` trait cannot be plugged
// into the global registry. `Kreuzberg.registerEmbeddingBackend(impl)`
// exists, but its `createEmbeddingBackendDartImpl` factory takes opaque
// `BoxFn*` closure values whose constructors are not surfaced through
// flutter_rust_bridge.
//
// Implement the backend in Rust as `Plugin + EmbeddingBackend` and register
// it via `register_embedding_backend` in a Rust shim crate.
}
```

View File

@@ -0,0 +1,15 @@
<!-- snippet:skip reason="DocumentExtractor trait has no createDocumentExtractorDartImpl factory; custom extractors must be implemented in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Custom document extractors cannot be registered from Dart. While
// registerDocumentExtractor exists in the KreuzbergBridge API, there is
// no createDocumentExtractorDartImpl factory to construct a Dart-based
// extractor implementation.
//
// Built-in extractors are registered automatically when the library
// initializes. Custom extractors must be written in Rust and linked into
// a Rust shim crate before the Dart host process loads the dynamic library.
}
```

View File

@@ -0,0 +1,17 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final extractors = await KreuzbergBridge.listDocumentExtractors();
print('Registered extractors: $extractors');
final processors = await KreuzbergBridge.listPostProcessors();
print('Registered processors: $processors');
final backends = await KreuzbergBridge.listOcrBackends();
print('Registered OCR backends: $backends');
final validators = await KreuzbergBridge.listValidators();
print('Registered validators: $validators');
}
```

View File

@@ -0,0 +1,16 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createValidatorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom validators must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A Dart implementation of the `Validator` trait that asserts a minimum
// content length cannot be plugged into the global validator registry.
// `Kreuzberg.registerValidator(impl)` exists, but its
// `createValidatorDartImpl` factory takes opaque `BoxFn*` closure
// arguments whose constructors are not surfaced through
// flutter_rust_bridge.
//
// Implement the validator in Rust and register it via `register_validator`
// in a Rust shim crate.
}
```

View File

@@ -0,0 +1,15 @@
<!-- snippet:skip reason="DocumentExtractor trait has no createDocumentExtractorDartImpl factory; custom extractors must be implemented in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Custom document extractors cannot be implemented in Dart. Creating a
// PDF metadata extractor would require implementing the DocumentExtractor
// trait, but flutter_rust_bridge does not generate the
// createDocumentExtractorDartImpl factory function.
//
// Implement the PDF metadata extractor in Rust and register it via a
// Rust shim crate that links kreuzberg before the Dart host loads the
// dynamic library.
}
```

View File

@@ -0,0 +1,17 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createPostProcessorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. The ProcessingStage enum is also not surfaced. Custom post-processors must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A Dart implementation of the `PostProcessor` trait that gates on PDF
// MIME type cannot be plugged into the global registry.
// `Kreuzberg.registerPostProcessor(impl)` exists, but its
// `createPostProcessorDartImpl` factory takes opaque `BoxFn*` closure
// values plus a `BoxFnDartFnFutureProcessingStage` whose constructors are
// not surfaced through flutter_rust_bridge. The `ProcessingStage` enum is
// not exported to Dart either.
//
// Implement the post-processor in Rust as `Plugin + PostProcessor` and
// register it via `register_post_processor` in a Rust shim crate.
}
```

View File

@@ -0,0 +1,15 @@
<!-- snippet:skip reason="DocumentExtractor trait has no createDocumentExtractorDartImpl factory in the generated Dart binding; custom extractors must be written and registered in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Custom document extractors cannot be implemented in Dart. While the
// traits.dart file includes the DocumentExtractor abstract class,
// flutter_rust_bridge does not generate a createDocumentExtractorDartImpl
// factory function, so there is no way to bridge Dart closures into the
// extractor registry.
//
// Implement custom extractors in Rust and register them via a Rust shim
// crate that links kreuzberg before the Dart host loads the dynamic library.
}
```

View File

@@ -0,0 +1,15 @@
<!-- snippet:skip reason="Plugin trait lifecycle methods (initialize, shutdown) are not exposed in Dart; logging must be implemented in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Plugin lifecycle logging hooks are not available in Dart. The Plugin
// trait methods (initialize, shutdown) that enable structured logging are
// only exposed in Rust. Dart plugins (OcrBackend, PostProcessor, Validator,
// EmbeddingBackend) cannot implement Plugin methods directly.
//
// For logging, implement plugins in Rust using the tracing or log crate,
// then register them via a Rust shim crate before the Dart host loads the
// dynamic library.
}
```

View File

@@ -0,0 +1,16 @@
<!-- snippet:skip reason="Testing Dart plugins via package:test is not practical because test closure capture varies by test framework; test via integration after registration or via Rust unit tests." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Plugin testing with Dart is different from Rust. Dart plugins cannot be
// unit-tested in isolation because the registration mechanism uses closures
// captured in the plugin factory, and test framework async contexts vary.
//
// Recommended approaches:
// 1. Test core plugin logic directly in unit tests with mock data
// 2. Write integration tests that register the plugin and exercise it via
// KreuzbergBridge.extractFile or other extraction methods
// 3. For complex plugins, implement in Rust and test with #[tokio::test]
}
```

View File

@@ -0,0 +1,16 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createValidatorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom validators must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A Dart implementation of the `Validator` trait that gates on MIME type
// cannot be plugged into the global validator registry.
// `Kreuzberg.registerValidator(impl)` exists, but its
// `createValidatorDartImpl` factory requires opaque `BoxFn*` closure
// values whose constructors are not surfaced through
// flutter_rust_bridge.
//
// Implement the validator in Rust and register it via `register_validator`
// in a Rust shim crate.
}
```

View File

@@ -0,0 +1,18 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createValidatorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom validators must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A Dart implementation of the `Validator` trait that inspects
// `metadata.additional["quality_score"]` cannot be plugged into the global
// validator registry. The Dart binding exposes
// `Kreuzberg.registerValidator(impl)` and the `createValidatorDartImpl`
// factory, but every closure parameter (`validate`, `shouldValidate`,
// `priority`) is typed as an opaque `BoxFn*` whose constructor is not
// surfaced through flutter_rust_bridge.
//
// Implement the validator in Rust as `Plugin + Validator` and register it
// via `register_validator` in a Rust shim crate that links kreuzberg
// before the Dart host process loads the dynamic library.
}
```

View File

@@ -0,0 +1,16 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createPostProcessorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. The closure-captured state pattern is therefore unreachable. Custom plugins must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A stateful Dart `PostProcessor` that captures mutable counters in its
// closure cannot be plugged into the global registry.
// `Kreuzberg.registerPostProcessor(impl)` exists, but the
// `createPostProcessorDartImpl` factory takes opaque `BoxFn*` closure
// values whose constructors are not surfaced through flutter_rust_bridge,
// so the closure-capture pattern is unreachable from Dart.
//
// Implement stateful plugins in Rust using `Mutex`/`AtomicU64` for
// interior mutability, then register them in a Rust shim crate.
}
```

View File

@@ -0,0 +1,23 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Custom-plugin construction (createXxxDartImpl) is unreachable from Dart
// due to opaque BoxFn closure types in the flutter_rust_bridge binding,
// so this snippet exercises the lifecycle against the *built-in* renderer
// registry (markdown / html / djot / plain).
var renderers = await KreuzbergBridge.listRenderers();
print('Renderers before unregister: $renderers');
// Unregister a single renderer by name.
await KreuzbergBridge.unregisterRenderer('plain');
renderers = await KreuzbergBridge.listRenderers();
print('Renderers after unregister: $renderers');
// Bulk-clear all renderers (including remaining built-ins).
await KreuzbergBridge.clearRenderers();
renderers = await KreuzbergBridge.listRenderers();
print('Renderers after clear: $renderers');
}
```

View File

@@ -0,0 +1,16 @@
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createPostProcessorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. The ProcessingStage enum is also not surfaced. Custom post-processors must be written in Rust." -->
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// A Dart implementation of the `PostProcessor` trait that counts words in
// the extracted content cannot be plugged into the global registry.
// `Kreuzberg.registerPostProcessor(impl)` exists, but its
// `createPostProcessorDartImpl` factory takes opaque `BoxFn*` closure
// values plus a `BoxFnDartFnFutureProcessingStage` whose constructors are
// not surfaced through flutter_rust_bridge.
//
// Implement the post-processor in Rust as `Plugin + PostProcessor` and
// register it via `register_post_processor` in a Rust shim crate.
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.markdown,
sizing: ChunkSizing.characters(),
prependHeadingContext: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
for (final chunk in chunks) {
print('Chunk ${chunk.metadata.chunkIndex}: ${chunk.content.length} chars');
}
}
```

View File

@@ -0,0 +1,50 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 500,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
final chunks = result.chunks ?? const [];
for (final chunk in chunks) {
final index = chunk.metadata.chunkIndex;
final total = chunk.metadata.totalChunks;
final start = chunk.metadata.byteStart;
final end = chunk.metadata.byteEnd;
final preview = chunk.content.length > 100
? chunk.content.substring(0, 100)
: chunk.content;
print('Chunk ${index + 1}/$total');
print('Position: $start-$end');
print('Content: $preview...');
final embedding = chunk.embedding;
if (embedding != null) {
print('Embedding: ${embedding.length} dimensions');
}
}
}
```

View File

@@ -0,0 +1,35 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1024,
overlap: 100,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks with embeddings: ${chunks.length}');
}
```

View File

@@ -0,0 +1,33 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.3,
ngramRange: Int64List.fromList(<int>[1, 3]),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
final keywords = result.extractedKeywords;
if (keywords != null) {
for (final keyword in keywords) {
print('${keyword.text} (score: ${keyword.score})');
}
}
}
```

View File

@@ -0,0 +1,30 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned_document.pdf', null, config);
final score = result.qualityScore;
if (score != null) {
if (score < 0.5) {
print('Warning: Low quality extraction (${score.toStringAsFixed(2)})');
} else {
print('Quality score: ${score.toStringAsFixed(2)}');
}
}
for (final warning in result.processingWarnings) {
print('Warning: $warning');
}
}
```

View File

@@ -0,0 +1,18 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
const config = EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
);
final texts = <String>['Hello, world!', 'Kreuzberg is fast'];
final embeddings = await KreuzbergBridge.embedTexts(texts, config);
print('Vectors: ${embeddings.length}');
print('Dimensions: ${embeddings.first.length}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Reduced content length: ${result.content.length}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('verbose_document.pdf', null, config);
print('Content length after reduction: ${result.content.length}');
}
```

View File

@@ -0,0 +1,77 @@
```dart title="Dart"
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
class VectorRecord {
VectorRecord({
required this.id,
required this.content,
required this.embedding,
required this.metadata,
});
final String id;
final String content;
final Float64List embedding;
final Map<String, String> metadata;
}
Future<List<VectorRecord>> extractAndVectorize(
String documentPath,
String documentId,
) async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 512,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
final records = <VectorRecord>[];
final chunks = result.chunks ?? const [];
for (var index = 0; index < chunks.length; index++) {
final chunk = chunks[index];
final embedding = chunk.embedding;
if (embedding == null) {
continue;
}
records.add(VectorRecord(
id: '${documentId}_chunk_$index',
content: chunk.content,
embedding: embedding,
metadata: <String, String>{
'document_id': documentId,
'chunk_index': index.toString(),
'content_length': chunk.content.length.toString(),
},
));
}
return records;
}
Future<void> main() async {
final records = await extractAndVectorize('document.pdf', 'doc-001');
print('Vector records: ${records.length}');
}
```