Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1000,
overlap: 200,
trim: true,
chunkerType: ChunkerType.markdown,
sizing: ChunkSizing.characters(),
prependHeadingContext: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks: ${chunks.length}');
for (final chunk in chunks) {
print('Chunk ${chunk.metadata.chunkIndex}: ${chunk.content.length} chars');
}
}
```

View File

@@ -0,0 +1,50 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 500,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
final chunks = result.chunks ?? const [];
for (final chunk in chunks) {
final index = chunk.metadata.chunkIndex;
final total = chunk.metadata.totalChunks;
final start = chunk.metadata.byteStart;
final end = chunk.metadata.byteEnd;
final preview = chunk.content.length > 100
? chunk.content.substring(0, 100)
: chunk.content;
print('Chunk ${index + 1}/$total');
print('Position: $start-$end');
print('Content: $preview...');
final embedding = chunk.embedding;
if (embedding != null) {
print('Embedding: ${embedding.length} dimensions');
}
}
}
```

View File

@@ -0,0 +1,35 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 1024,
overlap: 100,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
final chunks = result.chunks ?? const [];
print('Chunks with embeddings: ${chunks.length}');
}
```

View File

@@ -0,0 +1,33 @@
```dart title="Dart"
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
keywords: KeywordConfig(
algorithm: KeywordAlgorithm.yake,
maxKeywords: 10,
minScore: 0.3,
ngramRange: Int64List.fromList(<int>[1, 3]),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
useLayoutForMarkdown: false,
maxArchiveDepth: 3,
);
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
final keywords = result.extractedKeywords;
if (keywords != null) {
for (final keyword in keywords) {
print('${keyword.text} (score: ${keyword.score})');
}
}
}
```

View File

@@ -0,0 +1,30 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned_document.pdf', null, config);
final score = result.qualityScore;
if (score != null) {
if (score < 0.5) {
print('Warning: Low quality extraction (${score.toStringAsFixed(2)})');
} else {
print('Quality score: ${score.toStringAsFixed(2)}');
}
}
for (final warning in result.processingWarnings) {
print('Warning: $warning');
}
}
```

View File

@@ -0,0 +1,18 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
const config = EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
);
final texts = <String>['Hello, world!', 'Kreuzberg is fast'];
final embeddings = await KreuzbergBridge.embedTexts(texts, config);
print('Vectors: ${embeddings.length}');
print('Dimensions: ${embeddings.first.length}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
print('Reduced content length: ${result.content.length}');
}
```

View File

@@ -0,0 +1,24 @@
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
tokenReduction: const TokenReductionOptions(
mode: 'moderate',
preserveImportantWords: true,
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('verbose_document.pdf', null, config);
print('Content length after reduction: ${result.content.length}');
}
```

View File

@@ -0,0 +1,77 @@
```dart title="Dart"
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
class VectorRecord {
VectorRecord({
required this.id,
required this.content,
required this.embedding,
required this.metadata,
});
final String id;
final String content;
final Float64List embedding;
final Map<String, String> metadata;
}
Future<List<VectorRecord>> extractAndVectorize(
String documentPath,
String documentId,
) async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 512,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
final records = <VectorRecord>[];
final chunks = result.chunks ?? const [];
for (var index = 0; index < chunks.length; index++) {
final chunk = chunks[index];
final embedding = chunk.embedding;
if (embedding == null) {
continue;
}
records.add(VectorRecord(
id: '${documentId}_chunk_$index',
content: chunk.content,
embedding: embedding,
metadata: <String, String>{
'document_id': documentId,
'chunk_index': index.toString(),
'content_length': chunk.content.length.toString(),
},
));
}
return records;
}
Future<void> main() async {
final records = await extractAndVectorize('document.pdf', 'doc-001');
print('Vector records: ${records.length}');
}
```