Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,77 @@
```dart title="Dart"
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
class VectorRecord {
VectorRecord({
required this.id,
required this.content,
required this.embedding,
required this.metadata,
});
final String id;
final String content;
final Float64List embedding;
final Map<String, String> metadata;
}
Future<List<VectorRecord>> extractAndVectorize(
String documentPath,
String documentId,
) async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
chunking: const ChunkingConfig(
maxCharacters: 512,
overlap: 50,
trim: true,
chunkerType: ChunkerType.text,
sizing: ChunkSizing.characters(),
prependHeadingContext: false,
embedding: EmbeddingConfig(
model: EmbeddingModelType.preset(name: 'balanced'),
normalize: true,
batchSize: 32,
showDownloadProgress: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
final records = <VectorRecord>[];
final chunks = result.chunks ?? const [];
for (var index = 0; index < chunks.length; index++) {
final chunk = chunks[index];
final embedding = chunk.embedding;
if (embedding == null) {
continue;
}
records.add(VectorRecord(
id: '${documentId}_chunk_$index',
content: chunk.content,
embedding: embedding,
metadata: <String, String>{
'document_id': documentId,
'chunk_index': index.toString(),
'content_length': chunk.content.length.toString(),
},
));
}
return records;
}
Future<void> main() async {
final records = await extractAndVectorize('document.pdf', 'doc-001');
print('Vector records: ${records.length}');
}
```