Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
// OCR: Tesseract on English text
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
// Chunking: ~800-character markdown chunks with 100-char overlap
chunking: const ChunkingConfig(
maxCharacters: 800,
overlap: 100,
trim: true,
chunkerType: ChunkerType.markdown,
sizing: ChunkSizing.characters(),
prependHeadingContext: true,
),
// Image extraction
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 150,
maxImageDimension: 4096,
injectPlaceholders: false,
autoAdjustDpi: true,
minDpi: 72,
maxDpi: 300,
classify: false,
),
// Output: markdown with full document structure
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.markdown(),
includeDocumentStructure: true,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('report.pdf', null, config);
print('Content (${result.content.length} chars):');
final preview = result.content.substring(
0,
result.content.length < 200 ? result.content.length : 200,
);
print(preview);
if (result.chunks != null) {
print('\nChunks: ${result.chunks!.length}');
}
print('Tables: ${result.tables.length}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
if (result.extractionMethod != null) {
print('Extraction method: ${result.extractionMethod}');
}
}
```