Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,64 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
languageDetection: const LanguageDetectionConfig(
enabled: true,
minConfidence: 0.8,
detectMultiple: false,
),
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.1,
ngramRange: Int64List.fromList(<int>[1, 3]),
language: 'en',
),
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
postprocessor: const PostProcessorConfig(enabled: true),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Content: ${result.content}');
if (result.detectedLanguages != null) {
print('Languages: ${result.detectedLanguages}');
}
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
for (final chunk in chunks) {
print('Length: ${chunk.content.length}');
}
}
```

View File

@@ -0,0 +1,20 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,22 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
// Dart bindings do not expose config-file discovery. Build a default
// ExtractionConfig in code and pass it explicitly to KreuzbergBridge.extractFile.
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,27 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng',
autoRotate: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print('Content length: ${result.content.length}');
print('Tables detected: ${result.tables.length}');
}
```

View File

@@ -0,0 +1,56 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng+deu',
autoRotate: false,
tesseractConfig: TesseractConfig(
language: 'eng+deu',
psm: 6,
outputFormat: 'text',
oem: 3,
minConfidence: 0.0,
enableTableDetection: false,
tableMinConfidence: 0.5,
tableColumnThreshold: 20,
tableRowThresholdRatio: 0.5,
useCache: true,
classifyUsePreAdaptedTemplates: false,
languageModelNgramOn: false,
tesseditDontBlkrejGoodWds: false,
tesseditDontRowrejGoodWds: false,
tesseditEnableDictCorrection: false,
tesseditCharWhitelist: '',
tesseditCharBlacklist: '',
tesseditUsePrimaryParamsModel: false,
textordSpaceSizeIsVariable: false,
thresholdingMethod: false,
),
),
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Content length: ${result.content.length}');
}
```

View File

@@ -0,0 +1,23 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: true,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final document = result.document;
if (document != null) {
print('Document nodes: ${document.nodes.length}');
}
}
```

View File

@@ -0,0 +1,29 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.elementBased,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final elements = result.elements ?? const [];
for (final element in elements) {
print('Type: ${element.elementType}');
final preview = element.text.substring(
0,
element.text.length < 100 ? element.text.length : 100,
);
print('Text: $preview');
print('---');
}
}
```

View File

@@ -0,0 +1,35 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 16,
showDownloadProgress: true,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks with embeddings: ${chunks.length}');
}
```

View File

@@ -0,0 +1,25 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
htmlOutput: const HtmlOutputConfig(
theme: HtmlTheme.gitHub,
classPrefix: 'kb-',
embedCss: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.html(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print(result.content);
}
```

View File

@@ -0,0 +1,29 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.1,
ngramRange: Int64List.fromList(<int>[1, 3]),
language: 'en',
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Keywords: ${result.extractedKeywords}');
}
```

View File

@@ -0,0 +1,25 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
languageDetection: const LanguageDetectionConfig(
enabled: true,
minConfidence: 0.8,
detectMultiple: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Detected languages: ${result.detectedLanguages}');
}
```

View File

@@ -0,0 +1,31 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
images: const ImageExtractionConfig(
extractImages: true,
targetDpi: 300,
maxImageDimension: 4096,
injectPlaceholders: false,
autoAdjustDpi: true,
minDpi: 150,
maxDpi: 600,
classify: false,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final images = result.images ?? const [];
print('Extracted images: ${images.length}');
}
```

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
pdfOptions: const PdfConfig(
extractImages: true,
passwords: <String>['password123'],
extractMetadata: true,
extractAnnotations: false,
allowSingleColumnTables: false,
hierarchy: HierarchyConfig(
enabled: true,
kClusters: 4,
includeBbox: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('encrypted.pdf', null, config);
print('Title: ${result.metadata.title}');
}
```

View File

@@ -0,0 +1,33 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
pdfOptions: const PdfConfig(
extractImages: false,
extractMetadata: true,
extractAnnotations: false,
allowSingleColumnTables: false,
hierarchy: HierarchyConfig(
enabled: true,
kClusters: 5,
includeBbox: true,
ocrCoverageThreshold: 0.8,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final pages = result.pages ?? const [];
print('Pages with hierarchy: ${pages.where((p) => p.hierarchy != null).length}');
}
```

View File

@@ -0,0 +1,27 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
postprocessor: const PostProcessorConfig(
enabled: true,
enabledProcessors: <String>[
'whitespace_normalizer',
'unicode_normalizer',
],
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Processed content: ${result.content}');
}
```

View File

@@ -0,0 +1,21 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Quality score: ${result.qualityScore}');
print('Warnings: ${result.processingWarnings.length}');
}
```

View File

@@ -0,0 +1,48 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng+deu',
autoRotate: false,
tesseractConfig: TesseractConfig(
language: 'eng+deu',
psm: 6,
outputFormat: 'text',
oem: 3,
minConfidence: 0.0,
enableTableDetection: false,
tableMinConfidence: 0.5,
tableColumnThreshold: 20,
tableRowThresholdRatio: 0.5,
useCache: true,
classifyUsePreAdaptedTemplates: false,
languageModelNgramOn: false,
tesseditDontBlkrejGoodWds: false,
tesseditDontRowrejGoodWds: false,
tesseditEnableDictCorrection: false,
tesseditCharWhitelist: '',
tesseditCharBlacklist: '',
tesseditUsePrimaryParamsModel: false,
textordSpaceSizeIsVariable: false,
thresholdingMethod: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print('OCR text: ${result.content}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Reduced content: ${result.content}');
}
```