This commit is contained in:
44
docs/snippets/dart/advanced/chunk_page_mapping.md
Normal file
44
docs/snippets/dart/advanced/chunk_page_mapping.md
Normal file
@@ -0,0 +1,44 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 500,
|
||||
overlap: 50,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
),
|
||||
pages: const PageConfig(
|
||||
extractPages: true,
|
||||
insertPageMarkers: false,
|
||||
markerFormat: '',
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
for (final chunk in chunks) {
|
||||
final first = chunk.metadata.firstPage;
|
||||
final last = chunk.metadata.lastPage;
|
||||
if (first != null && last != null) {
|
||||
final preview = chunk.content.length > 50
|
||||
? chunk.content.substring(0, 50)
|
||||
: chunk.content;
|
||||
final pageRange = first == last ? 'Page $first' : 'Pages $first-$last';
|
||||
print('Chunk: $preview... ($pageRange)');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
29
docs/snippets/dart/advanced/chunking_config.md
Normal file
29
docs/snippets/dart/advanced/chunking_config.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.markdown,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks: ${chunks.length}');
|
||||
}
|
||||
```
|
||||
50
docs/snippets/dart/advanced/chunking_rag.md
Normal file
50
docs/snippets/dart/advanced/chunking_rag.md
Normal file
@@ -0,0 +1,50 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 500,
|
||||
overlap: 50,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
for (final chunk in chunks) {
|
||||
final index = chunk.metadata.chunkIndex;
|
||||
final total = chunk.metadata.totalChunks;
|
||||
final start = chunk.metadata.byteStart;
|
||||
final end = chunk.metadata.byteEnd;
|
||||
final preview = chunk.content.length > 100
|
||||
? chunk.content.substring(0, 100)
|
||||
: chunk.content;
|
||||
print('Chunk ${index + 1}/$total');
|
||||
print('Position: $start-$end');
|
||||
print('Content: $preview...');
|
||||
final embedding = chunk.embedding;
|
||||
if (embedding != null) {
|
||||
print('Embedding: ${embedding.length} dimensions');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
35
docs/snippets/dart/advanced/embedding_with_chunking.md
Normal file
35
docs/snippets/dart/advanced/embedding_with_chunking.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1024,
|
||||
overlap: 100,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks with embeddings: ${chunks.length}');
|
||||
}
|
||||
```
|
||||
29
docs/snippets/dart/advanced/keyword_extraction_config.md
Normal file
29
docs/snippets/dart/advanced/keyword_extraction_config.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```dart title="Dart"
|
||||
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
keywords: KeywordConfig(
|
||||
algorithm: KeywordAlgorithm.yake,
|
||||
maxKeywords: 10,
|
||||
minScore: 0.3,
|
||||
ngramRange: Int64List.fromList(<int>[1, 3]),
|
||||
language: 'en',
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
useLayoutForMarkdown: false,
|
||||
maxArchiveDepth: 3,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Keywords: ${result.extractedKeywords}');
|
||||
}
|
||||
```
|
||||
33
docs/snippets/dart/advanced/keyword_extraction_example.md
Normal file
33
docs/snippets/dart/advanced/keyword_extraction_example.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```dart title="Dart"
|
||||
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
keywords: KeywordConfig(
|
||||
algorithm: KeywordAlgorithm.yake,
|
||||
maxKeywords: 10,
|
||||
minScore: 0.3,
|
||||
ngramRange: Int64List.fromList(<int>[1, 3]),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
useLayoutForMarkdown: false,
|
||||
maxArchiveDepth: 3,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
|
||||
final keywords = result.extractedKeywords;
|
||||
if (keywords != null) {
|
||||
for (final keyword in keywords) {
|
||||
print('${keyword.text} (score: ${keyword.score})');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
25
docs/snippets/dart/advanced/language_detection_config.md
Normal file
25
docs/snippets/dart/advanced/language_detection_config.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
languageDetection: const LanguageDetectionConfig(
|
||||
enabled: true,
|
||||
minConfidence: 0.8,
|
||||
detectMultiple: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Detected languages: ${result.detectedLanguages}');
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,25 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
languageDetection: const LanguageDetectionConfig(
|
||||
enabled: true,
|
||||
minConfidence: 0.8,
|
||||
detectMultiple: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('multilingual_document.pdf', null, config);
|
||||
print('Detected languages: ${result.detectedLanguages}');
|
||||
}
|
||||
```
|
||||
20
docs/snippets/dart/advanced/quality_processing_config.md
Normal file
20
docs/snippets/dart/advanced/quality_processing_config.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Quality score: ${result.qualityScore}');
|
||||
}
|
||||
```
|
||||
30
docs/snippets/dart/advanced/quality_processing_example.md
Normal file
30
docs/snippets/dart/advanced/quality_processing_example.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned_document.pdf', null, config);
|
||||
final score = result.qualityScore;
|
||||
if (score != null) {
|
||||
if (score < 0.5) {
|
||||
print('Warning: Low quality extraction (${score.toStringAsFixed(2)})');
|
||||
} else {
|
||||
print('Quality score: ${score.toStringAsFixed(2)}');
|
||||
}
|
||||
}
|
||||
for (final warning in result.processingWarnings) {
|
||||
print('Warning: $warning');
|
||||
}
|
||||
}
|
||||
```
|
||||
24
docs/snippets/dart/advanced/token_reduction_config.md
Normal file
24
docs/snippets/dart/advanced/token_reduction_config.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
tokenReduction: const TokenReductionOptions(
|
||||
mode: 'moderate',
|
||||
preserveImportantWords: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Reduced content length: ${result.content.length}');
|
||||
}
|
||||
```
|
||||
24
docs/snippets/dart/advanced/token_reduction_example.md
Normal file
24
docs/snippets/dart/advanced/token_reduction_example.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
tokenReduction: const TokenReductionOptions(
|
||||
mode: 'moderate',
|
||||
preserveImportantWords: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('verbose_document.pdf', null, config);
|
||||
print('Content length after reduction: ${result.content.length}');
|
||||
}
|
||||
```
|
||||
77
docs/snippets/dart/advanced/vector_database_integration.md
Normal file
77
docs/snippets/dart/advanced/vector_database_integration.md
Normal file
@@ -0,0 +1,77 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
class VectorRecord {
|
||||
VectorRecord({
|
||||
required this.id,
|
||||
required this.content,
|
||||
required this.embedding,
|
||||
required this.metadata,
|
||||
});
|
||||
|
||||
final String id;
|
||||
final String content;
|
||||
final Float64List embedding;
|
||||
final Map<String, String> metadata;
|
||||
}
|
||||
|
||||
Future<List<VectorRecord>> extractAndVectorize(
|
||||
String documentPath,
|
||||
String documentId,
|
||||
) async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 512,
|
||||
overlap: 50,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
|
||||
final records = <VectorRecord>[];
|
||||
final chunks = result.chunks ?? const [];
|
||||
for (var index = 0; index < chunks.length; index++) {
|
||||
final chunk = chunks[index];
|
||||
final embedding = chunk.embedding;
|
||||
if (embedding == null) {
|
||||
continue;
|
||||
}
|
||||
records.add(VectorRecord(
|
||||
id: '${documentId}_chunk_$index',
|
||||
content: chunk.content,
|
||||
embedding: embedding,
|
||||
metadata: <String, String>{
|
||||
'document_id': documentId,
|
||||
'chunk_index': index.toString(),
|
||||
'content_length': chunk.content.length.toString(),
|
||||
},
|
||||
));
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
Future<void> main() async {
|
||||
final records = await extractAndVectorize('document.pdf', 'doc-001');
|
||||
print('Vector records: ${records.length}');
|
||||
}
|
||||
```
|
||||
28
docs/snippets/dart/api/batch_extract_bytes_sync.md
Normal file
28
docs/snippets/dart/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:convert';
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final Uint8List first = Uint8List.fromList(utf8.encode('Hello, world!'));
|
||||
final Uint8List second = Uint8List.fromList(utf8.encode('<html>test</html>'));
|
||||
|
||||
final items = <BatchBytesItem>[
|
||||
BatchBytesItem(content: first, mimeType: 'text/plain'),
|
||||
BatchBytesItem(
|
||||
content: second,
|
||||
mimeType: 'text/html',
|
||||
config: const FileExtractionConfig(forceOcr: true),
|
||||
),
|
||||
];
|
||||
|
||||
// Sync semantics — flutter_rust_bridge still returns a Future from Dart.
|
||||
final results = await KreuzbergBridge.batchExtractBytesSync(items);
|
||||
|
||||
print('Processed ${results.length} items');
|
||||
for (final result in results) {
|
||||
print('${result.mimeType}: ${result.content.length} chars');
|
||||
}
|
||||
}
|
||||
```
|
||||
21
docs/snippets/dart/api/batch_extract_files_sync.md
Normal file
21
docs/snippets/dart/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,21 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final items = <BatchFileItem>[
|
||||
const BatchFileItem(path: 'doc1.pdf'),
|
||||
BatchFileItem(
|
||||
path: 'scan.pdf',
|
||||
config: FileExtractionConfig(forceOcr: true),
|
||||
),
|
||||
];
|
||||
|
||||
// Sync semantics — flutter_rust_bridge still returns a Future from Dart.
|
||||
final results = await KreuzbergBridge.batchExtractFilesSync(items);
|
||||
|
||||
print('Processed ${results.length} files');
|
||||
for (final result in results) {
|
||||
print('${result.mimeType}: ${result.content.length} chars');
|
||||
}
|
||||
}
|
||||
```
|
||||
43
docs/snippets/dart/api/client_chunk_text.md
Normal file
43
docs/snippets/dart/api/client_chunk_text.md
Normal file
@@ -0,0 +1,43 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:convert';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:http/http.dart' as http;
|
||||
|
||||
Future<void> main() async {
|
||||
final file = File('document.pdf');
|
||||
final bytes = await file.readAsBytes();
|
||||
|
||||
final request = http.MultipartRequest(
|
||||
'POST',
|
||||
Uri.parse('http://localhost:8000/extract'),
|
||||
)
|
||||
..files.add(
|
||||
http.MultipartFile.fromBytes(
|
||||
'file',
|
||||
bytes,
|
||||
filename: 'document.pdf',
|
||||
),
|
||||
)
|
||||
..fields['chunking'] = jsonEncode({
|
||||
'max_characters': 800,
|
||||
'overlap': 100,
|
||||
});
|
||||
|
||||
final streamed = await request.send();
|
||||
final response = await http.Response.fromStream(streamed);
|
||||
if (response.statusCode >= 400) {
|
||||
throw HttpException('Server returned ${response.statusCode}: ${response.body}');
|
||||
}
|
||||
|
||||
final result = jsonDecode(response.body) as Map<String, dynamic>;
|
||||
final chunks = result['chunks'] as List<dynamic>?;
|
||||
if (chunks != null) {
|
||||
print('${chunks.length} chunks');
|
||||
for (final chunk in chunks) {
|
||||
final content = (chunk as Map<String, dynamic>)['content'] as String? ?? '';
|
||||
print(' ${content.length} chars');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
31
docs/snippets/dart/api/client_extract_single_file.md
Normal file
31
docs/snippets/dart/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:convert';
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:http/http.dart' as http;
|
||||
|
||||
Future<void> main() async {
|
||||
final file = File('document.pdf');
|
||||
final bytes = await file.readAsBytes();
|
||||
|
||||
final request = http.MultipartRequest(
|
||||
'POST',
|
||||
Uri.parse('http://localhost:8000/extract'),
|
||||
)..files.add(
|
||||
http.MultipartFile.fromBytes(
|
||||
'file',
|
||||
bytes,
|
||||
filename: 'document.pdf',
|
||||
),
|
||||
);
|
||||
|
||||
final streamed = await request.send();
|
||||
final response = await http.Response.fromStream(streamed);
|
||||
if (response.statusCode >= 400) {
|
||||
throw HttpException('Server returned ${response.statusCode}: ${response.body}');
|
||||
}
|
||||
|
||||
final result = jsonDecode(response.body) as Map<String, dynamic>;
|
||||
print(result['content'] ?? '');
|
||||
}
|
||||
```
|
||||
65
docs/snippets/dart/api/combining_all_features.md
Normal file
65
docs/snippets/dart/api/combining_all_features.md
Normal file
@@ -0,0 +1,65 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
// OCR: Tesseract on English text
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
// Chunking: ~800-character markdown chunks with 100-char overlap
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 800,
|
||||
overlap: 100,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.markdown,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: true,
|
||||
),
|
||||
// Image extraction
|
||||
images: const ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
targetDpi: 150,
|
||||
maxImageDimension: 4096,
|
||||
injectPlaceholders: false,
|
||||
autoAdjustDpi: true,
|
||||
minDpi: 72,
|
||||
maxDpi: 300,
|
||||
classify: false,
|
||||
),
|
||||
// Output: markdown with full document structure
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.markdown(),
|
||||
includeDocumentStructure: true,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('report.pdf', null, config);
|
||||
|
||||
print('Content (${result.content.length} chars):');
|
||||
final preview = result.content.substring(
|
||||
0,
|
||||
result.content.length < 200 ? result.content.length : 200,
|
||||
);
|
||||
print(preview);
|
||||
|
||||
if (result.chunks != null) {
|
||||
print('\nChunks: ${result.chunks!.length}');
|
||||
}
|
||||
print('Tables: ${result.tables.length}');
|
||||
if (result.detectedLanguages != null) {
|
||||
print('Languages: ${result.detectedLanguages}');
|
||||
}
|
||||
if (result.extractionMethod != null) {
|
||||
print('Extraction method: ${result.extractionMethod}');
|
||||
}
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/api/error_handling.md
Normal file
15
docs/snippets/dart/api/error_handling.md
Normal file
@@ -0,0 +1,15 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
try {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
print(result.content);
|
||||
} on Exception catch (e) {
|
||||
// flutter_rust_bridge converts every KreuzbergError variant
|
||||
// (Io / UnsupportedFormat / Parsing / MissingDependency, ...)
|
||||
// into a Dart exception whose message preserves the original context.
|
||||
print('Extraction failed: $e');
|
||||
}
|
||||
}
|
||||
```
|
||||
37
docs/snippets/dart/api/error_handling_extract.md
Normal file
37
docs/snippets/dart/api/error_handling_extract.md
Normal file
@@ -0,0 +1,37 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.markdown(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
try {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Extracted ${result.content.length} chars');
|
||||
print('MIME: ${result.mimeType}');
|
||||
if (result.detectedLanguages != null) {
|
||||
print('Languages: ${result.detectedLanguages}');
|
||||
}
|
||||
} on Exception catch (e) {
|
||||
final message = e.toString();
|
||||
if (message.contains('UnsupportedFormat')) {
|
||||
print('Unsupported format: $message');
|
||||
} else if (message.contains('MissingDependency')) {
|
||||
print('Install the required dependency: $message');
|
||||
} else if (message.contains('Parsing')) {
|
||||
print('Corrupt or invalid document: $message');
|
||||
} else {
|
||||
print('Extraction failed: $message');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
14
docs/snippets/dart/api/extract_bytes_async.md
Normal file
14
docs/snippets/dart/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:io';
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final Uint8List bytes = await File('document.pdf').readAsBytes();
|
||||
final result = await KreuzbergBridge.extractBytes(bytes, 'application/pdf');
|
||||
|
||||
print(result.content);
|
||||
print('MIME type: ${result.mimeType}');
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/api/extract_bytes_sync.md
Normal file
16
docs/snippets/dart/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:io';
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final Uint8List bytes = await File('document.pdf').readAsBytes();
|
||||
// Sync semantics — flutter_rust_bridge surfaces every call as a Future,
|
||||
// so even the *Sync entrypoints must be awaited from Dart.
|
||||
final result = await KreuzbergBridge.extractBytesSync(bytes, 'application/pdf');
|
||||
|
||||
print(result.content);
|
||||
print('MIME type: ${result.mimeType}');
|
||||
}
|
||||
```
|
||||
11
docs/snippets/dart/api/extract_file_async.md
Normal file
11
docs/snippets/dart/api/extract_file_async.md
Normal file
@@ -0,0 +1,11 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
print(result.content);
|
||||
print('MIME type: ${result.mimeType}');
|
||||
print('Tables: ${result.tables.length}');
|
||||
}
|
||||
```
|
||||
13
docs/snippets/dart/api/extract_file_sync.md
Normal file
13
docs/snippets/dart/api/extract_file_sync.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Sync semantics — flutter_rust_bridge surfaces every call as a Future,
|
||||
// so even the *Sync entrypoints must be awaited from Dart.
|
||||
final result = await KreuzbergBridge.extractFileSync('document.pdf', null);
|
||||
|
||||
print(result.content);
|
||||
print('MIME type: ${result.mimeType}');
|
||||
print('Tables: ${result.tables.length}');
|
||||
}
|
||||
```
|
||||
64
docs/snippets/dart/config/advanced_config.md
Normal file
64
docs/snippets/dart/config/advanced_config.md
Normal file
@@ -0,0 +1,64 @@
|
||||
```dart title="Dart"
|
||||
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
languageDetection: const LanguageDetectionConfig(
|
||||
enabled: true,
|
||||
minConfidence: 0.8,
|
||||
detectMultiple: false,
|
||||
),
|
||||
keywords: KeywordConfig(
|
||||
algorithm: KeywordAlgorithm.yake,
|
||||
maxKeywords: 10,
|
||||
minScore: 0.1,
|
||||
ngramRange: Int64List.fromList(<int>[1, 3]),
|
||||
language: 'en',
|
||||
),
|
||||
tokenReduction: const TokenReductionOptions(
|
||||
mode: 'moderate',
|
||||
preserveImportantWords: true,
|
||||
),
|
||||
postprocessor: const PostProcessorConfig(enabled: true),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
useLayoutForMarkdown: false,
|
||||
maxArchiveDepth: 3,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Content: ${result.content}');
|
||||
if (result.detectedLanguages != null) {
|
||||
print('Languages: ${result.detectedLanguages}');
|
||||
}
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks: ${chunks.length}');
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/config/chunking_config.md
Normal file
32
docs/snippets/dart/config/chunking_config.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks: ${chunks.length}');
|
||||
for (final chunk in chunks) {
|
||||
print('Length: ${chunk.content.length}');
|
||||
}
|
||||
}
|
||||
```
|
||||
20
docs/snippets/dart/config/config_basic.md
Normal file
20
docs/snippets/dart/config/config_basic.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
22
docs/snippets/dart/config/config_discover.md
Normal file
22
docs/snippets/dart/config/config_discover.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Dart bindings do not expose config-file discovery. Build a default
|
||||
// ExtractionConfig in code and pass it explicitly to KreuzbergBridge.extractFile.
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
27
docs/snippets/dart/config/config_ocr.md
Normal file
27
docs/snippets/dart/config/config_ocr.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print('Content length: ${result.content.length}');
|
||||
print('Tables detected: ${result.tables.length}');
|
||||
}
|
||||
```
|
||||
56
docs/snippets/dart/config/config_programmatic.md
Normal file
56
docs/snippets/dart/config/config_programmatic.md
Normal file
@@ -0,0 +1,56 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng+deu',
|
||||
autoRotate: false,
|
||||
tesseractConfig: TesseractConfig(
|
||||
language: 'eng+deu',
|
||||
psm: 6,
|
||||
outputFormat: 'text',
|
||||
oem: 3,
|
||||
minConfidence: 0.0,
|
||||
enableTableDetection: false,
|
||||
tableMinConfidence: 0.5,
|
||||
tableColumnThreshold: 20,
|
||||
tableRowThresholdRatio: 0.5,
|
||||
useCache: true,
|
||||
classifyUsePreAdaptedTemplates: false,
|
||||
languageModelNgramOn: false,
|
||||
tesseditDontBlkrejGoodWds: false,
|
||||
tesseditDontRowrejGoodWds: false,
|
||||
tesseditEnableDictCorrection: false,
|
||||
tesseditCharWhitelist: '',
|
||||
tesseditCharBlacklist: '',
|
||||
tesseditUsePrimaryParamsModel: false,
|
||||
textordSpaceSizeIsVariable: false,
|
||||
thresholdingMethod: false,
|
||||
),
|
||||
),
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Content length: ${result.content.length}');
|
||||
}
|
||||
```
|
||||
23
docs/snippets/dart/config/document_structure_config.md
Normal file
23
docs/snippets/dart/config/document_structure_config.md
Normal file
@@ -0,0 +1,23 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: true,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final document = result.document;
|
||||
if (document != null) {
|
||||
print('Document nodes: ${document.nodes.length}');
|
||||
}
|
||||
}
|
||||
```
|
||||
29
docs/snippets/dart/config/element_based_output.md
Normal file
29
docs/snippets/dart/config/element_based_output.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.elementBased,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final elements = result.elements ?? const [];
|
||||
for (final element in elements) {
|
||||
print('Type: ${element.elementType}');
|
||||
final preview = element.text.substring(
|
||||
0,
|
||||
element.text.length < 100 ? element.text.length : 100,
|
||||
);
|
||||
print('Text: $preview');
|
||||
print('---');
|
||||
}
|
||||
}
|
||||
```
|
||||
35
docs/snippets/dart/config/embedding_config.md
Normal file
35
docs/snippets/dart/config/embedding_config.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 16,
|
||||
showDownloadProgress: true,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks with embeddings: ${chunks.length}');
|
||||
}
|
||||
```
|
||||
25
docs/snippets/dart/config/html_output.md
Normal file
25
docs/snippets/dart/config/html_output.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
htmlOutput: const HtmlOutputConfig(
|
||||
theme: HtmlTheme.gitHub,
|
||||
classPrefix: 'kb-',
|
||||
embedCss: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.html(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
29
docs/snippets/dart/config/keyword_extraction_config.md
Normal file
29
docs/snippets/dart/config/keyword_extraction_config.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```dart title="Dart"
|
||||
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
keywords: KeywordConfig(
|
||||
algorithm: KeywordAlgorithm.yake,
|
||||
maxKeywords: 10,
|
||||
minScore: 0.1,
|
||||
ngramRange: Int64List.fromList(<int>[1, 3]),
|
||||
language: 'en',
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
useLayoutForMarkdown: false,
|
||||
maxArchiveDepth: 3,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Keywords: ${result.extractedKeywords}');
|
||||
}
|
||||
```
|
||||
25
docs/snippets/dart/config/language_detection_config.md
Normal file
25
docs/snippets/dart/config/language_detection_config.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
languageDetection: const LanguageDetectionConfig(
|
||||
enabled: true,
|
||||
minConfidence: 0.8,
|
||||
detectMultiple: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Detected languages: ${result.detectedLanguages}');
|
||||
}
|
||||
```
|
||||
31
docs/snippets/dart/config/ocr_dpi_config.md
Normal file
31
docs/snippets/dart/config/ocr_dpi_config.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
images: const ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
targetDpi: 300,
|
||||
maxImageDimension: 4096,
|
||||
injectPlaceholders: false,
|
||||
autoAdjustDpi: true,
|
||||
minDpi: 150,
|
||||
maxDpi: 600,
|
||||
classify: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final images = result.images ?? const [];
|
||||
print('Extracted images: ${images.length}');
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/config/pdf_config.md
Normal file
32
docs/snippets/dart/config/pdf_config.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
pdfOptions: const PdfConfig(
|
||||
extractImages: true,
|
||||
passwords: <String>['password123'],
|
||||
extractMetadata: true,
|
||||
extractAnnotations: false,
|
||||
allowSingleColumnTables: false,
|
||||
hierarchy: HierarchyConfig(
|
||||
enabled: true,
|
||||
kClusters: 4,
|
||||
includeBbox: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('encrypted.pdf', null, config);
|
||||
print('Title: ${result.metadata.title}');
|
||||
}
|
||||
```
|
||||
33
docs/snippets/dart/config/pdf_hierarchy_config.md
Normal file
33
docs/snippets/dart/config/pdf_hierarchy_config.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
pdfOptions: const PdfConfig(
|
||||
extractImages: false,
|
||||
extractMetadata: true,
|
||||
extractAnnotations: false,
|
||||
allowSingleColumnTables: false,
|
||||
hierarchy: HierarchyConfig(
|
||||
enabled: true,
|
||||
kClusters: 5,
|
||||
includeBbox: true,
|
||||
ocrCoverageThreshold: 0.8,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final pages = result.pages ?? const [];
|
||||
print('Pages with hierarchy: ${pages.where((p) => p.hierarchy != null).length}');
|
||||
}
|
||||
```
|
||||
27
docs/snippets/dart/config/postprocessor_config.md
Normal file
27
docs/snippets/dart/config/postprocessor_config.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
postprocessor: const PostProcessorConfig(
|
||||
enabled: true,
|
||||
enabledProcessors: <String>[
|
||||
'whitespace_normalizer',
|
||||
'unicode_normalizer',
|
||||
],
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Processed content: ${result.content}');
|
||||
}
|
||||
```
|
||||
21
docs/snippets/dart/config/quality_processing_config.md
Normal file
21
docs/snippets/dart/config/quality_processing_config.md
Normal file
@@ -0,0 +1,21 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Quality score: ${result.qualityScore}');
|
||||
print('Warnings: ${result.processingWarnings.length}');
|
||||
}
|
||||
```
|
||||
48
docs/snippets/dart/config/tesseract_config.md
Normal file
48
docs/snippets/dart/config/tesseract_config.md
Normal file
@@ -0,0 +1,48 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng+deu',
|
||||
autoRotate: false,
|
||||
tesseractConfig: TesseractConfig(
|
||||
language: 'eng+deu',
|
||||
psm: 6,
|
||||
outputFormat: 'text',
|
||||
oem: 3,
|
||||
minConfidence: 0.0,
|
||||
enableTableDetection: false,
|
||||
tableMinConfidence: 0.5,
|
||||
tableColumnThreshold: 20,
|
||||
tableRowThresholdRatio: 0.5,
|
||||
useCache: true,
|
||||
classifyUsePreAdaptedTemplates: false,
|
||||
languageModelNgramOn: false,
|
||||
tesseditDontBlkrejGoodWds: false,
|
||||
tesseditDontRowrejGoodWds: false,
|
||||
tesseditEnableDictCorrection: false,
|
||||
tesseditCharWhitelist: '',
|
||||
tesseditCharBlacklist: '',
|
||||
tesseditUsePrimaryParamsModel: false,
|
||||
textordSpaceSizeIsVariable: false,
|
||||
thresholdingMethod: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print('OCR text: ${result.content}');
|
||||
}
|
||||
```
|
||||
24
docs/snippets/dart/config/token_reduction_config.md
Normal file
24
docs/snippets/dart/config/token_reduction_config.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
tokenReduction: const TokenReductionOptions(
|
||||
mode: 'moderate',
|
||||
preserveImportantWords: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Reduced content: ${result.content}');
|
||||
}
|
||||
```
|
||||
12
docs/snippets/dart/getting-started/basic_usage.md
Normal file
12
docs/snippets/dart/getting-started/basic_usage.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Default ExtractionConfig — flutter_rust_bridge surfaces every call
|
||||
// as a Future, so even non-async-flavored entrypoints must be awaited.
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
print(result.content);
|
||||
print('MIME type: ${result.mimeType}');
|
||||
}
|
||||
```
|
||||
11
docs/snippets/dart/getting-started/extract_file.md
Normal file
11
docs/snippets/dart/getting-started/extract_file.md
Normal file
@@ -0,0 +1,11 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
print(result.content);
|
||||
print('MIME type: ${result.mimeType}');
|
||||
print('Tables: ${result.tables.length}');
|
||||
}
|
||||
```
|
||||
28
docs/snippets/dart/getting-started/extract_with_ocr.md
Normal file
28
docs/snippets/dart/getting-started/extract_with_ocr.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final ocr = OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
);
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: true,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false, ocr: ocr,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print(result.content);
|
||||
print('Detected languages: ${result.detectedLanguages}');
|
||||
}
|
||||
```
|
||||
9
docs/snippets/dart/getting-started/hello_world.md
Normal file
9
docs/snippets/dart/getting-started/hello_world.md
Normal file
@@ -0,0 +1,9 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
print('Hello from kreuzberg!');
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
7
docs/snippets/dart/getting-started/install_verify.md
Normal file
7
docs/snippets/dart/getting-started/install_verify.md
Normal file
@@ -0,0 +1,7 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
print('kreuzberg loaded successfully');
|
||||
}
|
||||
```
|
||||
20
docs/snippets/dart/getting-started/read_content.md
Normal file
20
docs/snippets/dart/getting-started/read_content.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
print(result.content);
|
||||
|
||||
for (final table in result.tables) {
|
||||
print('Table: $table');
|
||||
}
|
||||
|
||||
final chunks = result.chunks;
|
||||
if (chunks != null) {
|
||||
for (final chunk in chunks) {
|
||||
print('Chunk: $chunk');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
47
docs/snippets/dart/llm/structured_extraction.md
Normal file
47
docs/snippets/dart/llm/structured_extraction.md
Normal file
@@ -0,0 +1,47 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```dart title="Dart"
|
||||
import 'dart:convert';
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final schema = jsonEncode(<String, Object?>{
|
||||
'type': 'object',
|
||||
'properties': <String, Object?>{
|
||||
'title': <String, Object?>{'type': 'string'},
|
||||
'authors': <String, Object?>{
|
||||
'type': 'array',
|
||||
'items': <String, Object?>{'type': 'string'},
|
||||
},
|
||||
'date': <String, Object?>{'type': 'string'},
|
||||
},
|
||||
'required': <String>['title', 'authors', 'date'],
|
||||
'additionalProperties': false,
|
||||
});
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
structuredExtraction: StructuredExtractionConfig(
|
||||
schema: schema,
|
||||
schemaName: 'paper_metadata',
|
||||
strict: true,
|
||||
llm: const LlmConfig(model: 'openai/gpt-4o-mini'),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('paper.pdf', null, config);
|
||||
final structured = result.structuredOutput;
|
||||
if (structured != null) {
|
||||
print(structured);
|
||||
}
|
||||
}
|
||||
```
|
||||
33
docs/snippets/dart/mcp/mcp_custom_client.md
Normal file
33
docs/snippets/dart/mcp/mcp_custom_client.md
Normal file
@@ -0,0 +1,33 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```dart title="Dart"
|
||||
import 'dart:convert';
|
||||
import 'dart:io';
|
||||
|
||||
Future<void> main() async {
|
||||
final process = await Process.start('kreuzberg', <String>['mcp']);
|
||||
|
||||
final request = <String, Object?>{
|
||||
'method': 'tools/call',
|
||||
'params': <String, Object?>{
|
||||
'name': 'extract_file',
|
||||
'arguments': <String, Object?>{
|
||||
'path': 'document.pdf',
|
||||
'async': true,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
process.stdin.writeln(jsonEncode(request));
|
||||
await process.stdin.flush();
|
||||
await process.stdin.close();
|
||||
|
||||
final line = await process.stdout
|
||||
.transform(utf8.decoder)
|
||||
.transform(const LineSplitter())
|
||||
.first;
|
||||
print(line);
|
||||
|
||||
await process.exitCode;
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/mcp/mcp_server_start.md
Normal file
15
docs/snippets/dart/mcp/mcp_server_start.md
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```dart title="Dart"
|
||||
import 'dart:io';
|
||||
|
||||
Future<void> main() async {
|
||||
final process = await Process.start(
|
||||
'kreuzberg',
|
||||
<String>['mcp'],
|
||||
mode: ProcessStartMode.inheritStdio,
|
||||
);
|
||||
final exitCode = await process.exitCode;
|
||||
exit(exitCode);
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/metadata/language_detection.md
Normal file
32
docs/snippets/dart/metadata/language_detection.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final languageDetection = LanguageDetectionConfig(
|
||||
enabled: true,
|
||||
minConfidence: 0.5,
|
||||
detectMultiple: false,
|
||||
);
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false, languageDetection: languageDetection,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
|
||||
final detected = result.detectedLanguages;
|
||||
if (detected != null && detected.isNotEmpty) {
|
||||
print('Primary language: ${detected.first}');
|
||||
} else {
|
||||
print('No language detected');
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,36 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final languageDetection = LanguageDetectionConfig(
|
||||
enabled: true,
|
||||
minConfidence: 0.3,
|
||||
detectMultiple: true,
|
||||
);
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false, languageDetection: languageDetection,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('multilingual.pdf', null, config);
|
||||
|
||||
final detected = result.detectedLanguages;
|
||||
if (detected == null || detected.isEmpty) {
|
||||
print('No languages detected');
|
||||
return;
|
||||
}
|
||||
|
||||
print('Detected ${detected.length} language(s):');
|
||||
for (final language in detected) {
|
||||
print(' - $language');
|
||||
}
|
||||
}
|
||||
```
|
||||
38
docs/snippets/dart/metadata/metadata.md
Normal file
38
docs/snippets/dart/metadata/metadata.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
final metadata = result.metadata;
|
||||
|
||||
if (metadata.title != null) {
|
||||
print('Title: ${metadata.title}');
|
||||
}
|
||||
if (metadata.subject != null) {
|
||||
print('Subject: ${metadata.subject}');
|
||||
}
|
||||
if (metadata.authors != null) {
|
||||
print('Authors: ${metadata.authors!.join(', ')}');
|
||||
}
|
||||
if (metadata.keywords != null) {
|
||||
print('Keywords: ${metadata.keywords!.join(', ')}');
|
||||
}
|
||||
if (metadata.language != null) {
|
||||
print('Language: ${metadata.language}');
|
||||
}
|
||||
if (metadata.createdAt != null) {
|
||||
print('Created: ${metadata.createdAt}');
|
||||
}
|
||||
if (metadata.modifiedAt != null) {
|
||||
print('Modified: ${metadata.modifiedAt}');
|
||||
}
|
||||
if (metadata.extractionDurationMs != null) {
|
||||
print('Extraction took: ${metadata.extractionDurationMs} ms');
|
||||
}
|
||||
|
||||
for (final entry in metadata.additional.entries) {
|
||||
print('Additional[${entry.key}]: ${entry.value}');
|
||||
}
|
||||
}
|
||||
```
|
||||
31
docs/snippets/dart/metadata/page_boundaries.md
Normal file
31
docs/snippets/dart/metadata/page_boundaries.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
final pages = result.metadata.pages;
|
||||
if (pages == null) {
|
||||
print('No page structure available');
|
||||
return;
|
||||
}
|
||||
|
||||
final boundaries = pages.boundaries;
|
||||
if (boundaries == null || boundaries.isEmpty) {
|
||||
print('No page boundaries available');
|
||||
return;
|
||||
}
|
||||
|
||||
final content = result.content;
|
||||
for (final boundary in boundaries.take(3)) {
|
||||
final start = boundary.byteStart.toInt();
|
||||
final end = boundary.byteEnd.toInt();
|
||||
final pageText = content.substring(start, end);
|
||||
final previewEnd = pageText.length < 100 ? pageText.length : 100;
|
||||
|
||||
print('Page ${boundary.pageNumber}:');
|
||||
print(' Byte range: $start-$end');
|
||||
print(' Preview: ${pageText.substring(0, previewEnd)}...');
|
||||
}
|
||||
}
|
||||
```
|
||||
38
docs/snippets/dart/metadata/page_tracking_basic.md
Normal file
38
docs/snippets/dart/metadata/page_tracking_basic.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final pageConfig = PageConfig(
|
||||
extractPages: true,
|
||||
insertPageMarkers: false,
|
||||
markerFormat: '<!-- page {page} -->',
|
||||
);
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false, pages: pageConfig,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
|
||||
final pages = result.pages;
|
||||
if (pages == null) {
|
||||
print('No per-page content available');
|
||||
return;
|
||||
}
|
||||
|
||||
for (final page in pages) {
|
||||
print('Page ${page.pageNumber}:');
|
||||
print(' Content: ${page.content.length} chars');
|
||||
print(' Tables: ${page.tables.length}');
|
||||
print(' Images: ${page.images.length}');
|
||||
}
|
||||
}
|
||||
```
|
||||
20
docs/snippets/dart/metadata/tables.md
Normal file
20
docs/snippets/dart/metadata/tables.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null);
|
||||
|
||||
for (final table in result.tables) {
|
||||
print('Table on page ${table.pageNumber} with ${table.cells.length} rows');
|
||||
print(table.markdown);
|
||||
|
||||
for (final row in table.cells) {
|
||||
print(row);
|
||||
}
|
||||
|
||||
if (table.boundingBox != null) {
|
||||
print('Bounding box: ${table.boundingBox}');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
92
docs/snippets/dart/metadata/vector_database_integration.md
Normal file
92
docs/snippets/dart/metadata/vector_database_integration.md
Normal file
@@ -0,0 +1,92 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
class VectorRecord {
|
||||
final String id;
|
||||
final List<double> embedding;
|
||||
final String content;
|
||||
final Map<String, Object?> metadata;
|
||||
|
||||
const VectorRecord({
|
||||
required this.id,
|
||||
required this.embedding,
|
||||
required this.content,
|
||||
required this.metadata,
|
||||
});
|
||||
}
|
||||
|
||||
void storeInVectorDatabase(List<VectorRecord> records) {
|
||||
for (final record in records) {
|
||||
if (record.embedding.isEmpty) {
|
||||
continue;
|
||||
}
|
||||
print(
|
||||
'Storing ${record.id}: ${record.content.length} chars, '
|
||||
'${record.embedding.length} dims',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Future<List<VectorRecord>> extractAndVectorize(
|
||||
String documentPath,
|
||||
String documentId,
|
||||
) async {
|
||||
final embedding = EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
);
|
||||
|
||||
final chunking = ChunkingConfig(
|
||||
maxCharacters: 512,
|
||||
overlap: 50,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
embedding: embedding,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
);
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false, chunking: chunking,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
|
||||
final chunks = result.chunks ?? const <Chunk>[];
|
||||
|
||||
final records = <VectorRecord>[];
|
||||
for (var index = 0; index < chunks.length; index++) {
|
||||
final chunk = chunks[index];
|
||||
final embeddingValues = chunk.embedding?.toList() ?? const <double>[];
|
||||
|
||||
records.add(
|
||||
VectorRecord(
|
||||
id: '${documentId}_chunk_$index',
|
||||
content: chunk.content,
|
||||
embedding: embeddingValues,
|
||||
metadata: {
|
||||
'document_id': documentId,
|
||||
'chunk_index': index,
|
||||
'content_length': chunk.content.length,
|
||||
},
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
storeInVectorDatabase(records);
|
||||
return records;
|
||||
}
|
||||
|
||||
Future<void> main() async {
|
||||
await extractAndVectorize('document.pdf', 'doc-1');
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/ocr/cloud_ocr_backend.md
Normal file
32
docs/snippets/dart/ocr/cloud_ocr_backend.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Cloud OCR backends are registered in the Rust core. From Dart, select a
|
||||
// registered backend by name. Use `KreuzbergBridge.listOcrBackends()` to
|
||||
// discover available backends at runtime.
|
||||
final backends = await KreuzbergBridge.listOcrBackends();
|
||||
print('Available OCR backends: $backends');
|
||||
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'cloud',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
31
docs/snippets/dart/ocr/image_extraction.md
Normal file
31
docs/snippets/dart/ocr/image_extraction.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
images: const ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
targetDpi: 300,
|
||||
maxImageDimension: 4096,
|
||||
injectPlaceholders: false,
|
||||
autoAdjustDpi: true,
|
||||
minDpi: 150,
|
||||
maxDpi: 600,
|
||||
classify: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final images = result.images ?? const [];
|
||||
print('Extracted images: ${images.length}');
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/ocr/image_preprocessing.md
Normal file
32
docs/snippets/dart/ocr/image_preprocessing.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
images: const ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
targetDpi: 300,
|
||||
maxImageDimension: 4096,
|
||||
injectPlaceholders: true,
|
||||
autoAdjustDpi: true,
|
||||
minDpi: 150,
|
||||
maxDpi: 600,
|
||||
maxImagesPerPage: 20,
|
||||
classify: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final images = result.images ?? const [];
|
||||
print('Preprocessed images: ${images.length}');
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_easyocr.md
Normal file
26
docs/snippets/dart/ocr/ocr_easyocr.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'easyocr',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
38
docs/snippets/dart/ocr/ocr_elements.md
Normal file
38
docs/snippets/dart/ocr/ocr_elements.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'paddleocr',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
elementConfig: OcrElementConfig(
|
||||
includeElements: true,
|
||||
minLevel: OcrElementLevel.word,
|
||||
minConfidence: 0.0,
|
||||
buildHierarchy: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
final elements = result.ocrElements ?? const <OcrElement>[];
|
||||
for (final element in elements) {
|
||||
print('Text: ${element.text}');
|
||||
print('Confidence: ${element.confidence.recognition.toStringAsFixed(2)}');
|
||||
print('Level: ${element.level}');
|
||||
print('Page: ${element.pageNumber}');
|
||||
}
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_extraction.md
Normal file
26
docs/snippets/dart/ocr/ocr_extraction.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_force_all_pages.md
Normal file
26
docs/snippets/dart/ocr/ocr_force_all_pages.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: true,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_multi_language.md
Normal file
26
docs/snippets/dart/ocr/ocr_multi_language.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng+deu+fra',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('multilingual.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
26
docs/snippets/dart/ocr/ocr_paddleocr.md
Normal file
26
docs/snippets/dart/ocr/ocr_paddleocr.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'paddleocr',
|
||||
language: 'en',
|
||||
autoRotate: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print(result.content);
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/plugins/clear_plugins.md
Normal file
16
docs/snippets/dart/plugins/clear_plugins.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// The Dart binding exposes bulk-clear entry points for OCR backends,
|
||||
// post-processors, and validators. Document-extractor clearing is not
|
||||
// surfaced through flutter_rust_bridge; the built-in extractors are
|
||||
// registered automatically by the kreuzberg core when the library
|
||||
// initializes.
|
||||
await KreuzbergBridge.clearOcrBackends();
|
||||
await KreuzbergBridge.clearPostProcessors();
|
||||
await KreuzbergBridge.clearValidators();
|
||||
|
||||
print('OCR backends, post-processors, and validators cleared');
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/plugins/embedding_backend.md
Normal file
15
docs/snippets/dart/plugins/embedding_backend.md
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createEmbeddingBackendDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom embedding backends must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A Dart implementation of the `EmbeddingBackend` trait cannot be plugged
|
||||
// into the global registry. `Kreuzberg.registerEmbeddingBackend(impl)`
|
||||
// exists, but its `createEmbeddingBackendDartImpl` factory takes opaque
|
||||
// `BoxFn*` closure values whose constructors are not surfaced through
|
||||
// flutter_rust_bridge.
|
||||
//
|
||||
// Implement the backend in Rust as `Plugin + EmbeddingBackend` and register
|
||||
// it via `register_embedding_backend` in a Rust shim crate.
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/plugins/extractor_registration.md
Normal file
15
docs/snippets/dart/plugins/extractor_registration.md
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- snippet:skip reason="DocumentExtractor trait has no createDocumentExtractorDartImpl factory; custom extractors must be implemented in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Custom document extractors cannot be registered from Dart. While
|
||||
// registerDocumentExtractor exists in the KreuzbergBridge API, there is
|
||||
// no createDocumentExtractorDartImpl factory to construct a Dart-based
|
||||
// extractor implementation.
|
||||
//
|
||||
// Built-in extractors are registered automatically when the library
|
||||
// initializes. Custom extractors must be written in Rust and linked into
|
||||
// a Rust shim crate before the Dart host process loads the dynamic library.
|
||||
}
|
||||
```
|
||||
17
docs/snippets/dart/plugins/list_plugins.md
Normal file
17
docs/snippets/dart/plugins/list_plugins.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final extractors = await KreuzbergBridge.listDocumentExtractors();
|
||||
print('Registered extractors: $extractors');
|
||||
|
||||
final processors = await KreuzbergBridge.listPostProcessors();
|
||||
print('Registered processors: $processors');
|
||||
|
||||
final backends = await KreuzbergBridge.listOcrBackends();
|
||||
print('Registered OCR backends: $backends');
|
||||
|
||||
final validators = await KreuzbergBridge.listValidators();
|
||||
print('Registered validators: $validators');
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/plugins/min_length_validator.md
Normal file
16
docs/snippets/dart/plugins/min_length_validator.md
Normal file
@@ -0,0 +1,16 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createValidatorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom validators must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A Dart implementation of the `Validator` trait that asserts a minimum
|
||||
// content length cannot be plugged into the global validator registry.
|
||||
// `Kreuzberg.registerValidator(impl)` exists, but its
|
||||
// `createValidatorDartImpl` factory takes opaque `BoxFn*` closure
|
||||
// arguments whose constructors are not surfaced through
|
||||
// flutter_rust_bridge.
|
||||
//
|
||||
// Implement the validator in Rust and register it via `register_validator`
|
||||
// in a Rust shim crate.
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/plugins/pdf_metadata_extractor.md
Normal file
15
docs/snippets/dart/plugins/pdf_metadata_extractor.md
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- snippet:skip reason="DocumentExtractor trait has no createDocumentExtractorDartImpl factory; custom extractors must be implemented in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Custom document extractors cannot be implemented in Dart. Creating a
|
||||
// PDF metadata extractor would require implementing the DocumentExtractor
|
||||
// trait, but flutter_rust_bridge does not generate the
|
||||
// createDocumentExtractorDartImpl factory function.
|
||||
//
|
||||
// Implement the PDF metadata extractor in Rust and register it via a
|
||||
// Rust shim crate that links kreuzberg before the Dart host loads the
|
||||
// dynamic library.
|
||||
}
|
||||
```
|
||||
17
docs/snippets/dart/plugins/pdf_only_processor.md
Normal file
17
docs/snippets/dart/plugins/pdf_only_processor.md
Normal file
@@ -0,0 +1,17 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createPostProcessorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. The ProcessingStage enum is also not surfaced. Custom post-processors must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A Dart implementation of the `PostProcessor` trait that gates on PDF
|
||||
// MIME type cannot be plugged into the global registry.
|
||||
// `Kreuzberg.registerPostProcessor(impl)` exists, but its
|
||||
// `createPostProcessorDartImpl` factory takes opaque `BoxFn*` closure
|
||||
// values plus a `BoxFnDartFnFutureProcessingStage` whose constructors are
|
||||
// not surfaced through flutter_rust_bridge. The `ProcessingStage` enum is
|
||||
// not exported to Dart either.
|
||||
//
|
||||
// Implement the post-processor in Rust as `Plugin + PostProcessor` and
|
||||
// register it via `register_post_processor` in a Rust shim crate.
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/plugins/plugin_extractor.md
Normal file
15
docs/snippets/dart/plugins/plugin_extractor.md
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- snippet:skip reason="DocumentExtractor trait has no createDocumentExtractorDartImpl factory in the generated Dart binding; custom extractors must be written and registered in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Custom document extractors cannot be implemented in Dart. While the
|
||||
// traits.dart file includes the DocumentExtractor abstract class,
|
||||
// flutter_rust_bridge does not generate a createDocumentExtractorDartImpl
|
||||
// factory function, so there is no way to bridge Dart closures into the
|
||||
// extractor registry.
|
||||
//
|
||||
// Implement custom extractors in Rust and register them via a Rust shim
|
||||
// crate that links kreuzberg before the Dart host loads the dynamic library.
|
||||
}
|
||||
```
|
||||
15
docs/snippets/dart/plugins/plugin_logging.md
Normal file
15
docs/snippets/dart/plugins/plugin_logging.md
Normal file
@@ -0,0 +1,15 @@
|
||||
<!-- snippet:skip reason="Plugin trait lifecycle methods (initialize, shutdown) are not exposed in Dart; logging must be implemented in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Plugin lifecycle logging hooks are not available in Dart. The Plugin
|
||||
// trait methods (initialize, shutdown) that enable structured logging are
|
||||
// only exposed in Rust. Dart plugins (OcrBackend, PostProcessor, Validator,
|
||||
// EmbeddingBackend) cannot implement Plugin methods directly.
|
||||
//
|
||||
// For logging, implement plugins in Rust using the tracing or log crate,
|
||||
// then register them via a Rust shim crate before the Dart host loads the
|
||||
// dynamic library.
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/plugins/plugin_testing.md
Normal file
16
docs/snippets/dart/plugins/plugin_testing.md
Normal file
@@ -0,0 +1,16 @@
|
||||
<!-- snippet:skip reason="Testing Dart plugins via package:test is not practical because test closure capture varies by test framework; test via integration after registration or via Rust unit tests." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Plugin testing with Dart is different from Rust. Dart plugins cannot be
|
||||
// unit-tested in isolation because the registration mechanism uses closures
|
||||
// captured in the plugin factory, and test framework async contexts vary.
|
||||
//
|
||||
// Recommended approaches:
|
||||
// 1. Test core plugin logic directly in unit tests with mock data
|
||||
// 2. Write integration tests that register the plugin and exercise it via
|
||||
// KreuzbergBridge.extractFile or other extraction methods
|
||||
// 3. For complex plugins, implement in Rust and test with #[tokio::test]
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/plugins/plugin_validator.md
Normal file
16
docs/snippets/dart/plugins/plugin_validator.md
Normal file
@@ -0,0 +1,16 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createValidatorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom validators must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A Dart implementation of the `Validator` trait that gates on MIME type
|
||||
// cannot be plugged into the global validator registry.
|
||||
// `Kreuzberg.registerValidator(impl)` exists, but its
|
||||
// `createValidatorDartImpl` factory requires opaque `BoxFn*` closure
|
||||
// values whose constructors are not surfaced through
|
||||
// flutter_rust_bridge.
|
||||
//
|
||||
// Implement the validator in Rust and register it via `register_validator`
|
||||
// in a Rust shim crate.
|
||||
}
|
||||
```
|
||||
18
docs/snippets/dart/plugins/quality_score_validator.md
Normal file
18
docs/snippets/dart/plugins/quality_score_validator.md
Normal file
@@ -0,0 +1,18 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createValidatorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. Custom validators must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A Dart implementation of the `Validator` trait that inspects
|
||||
// `metadata.additional["quality_score"]` cannot be plugged into the global
|
||||
// validator registry. The Dart binding exposes
|
||||
// `Kreuzberg.registerValidator(impl)` and the `createValidatorDartImpl`
|
||||
// factory, but every closure parameter (`validate`, `shouldValidate`,
|
||||
// `priority`) is typed as an opaque `BoxFn*` whose constructor is not
|
||||
// surfaced through flutter_rust_bridge.
|
||||
//
|
||||
// Implement the validator in Rust as `Plugin + Validator` and register it
|
||||
// via `register_validator` in a Rust shim crate that links kreuzberg
|
||||
// before the Dart host process loads the dynamic library.
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/plugins/stateful_plugin.md
Normal file
16
docs/snippets/dart/plugins/stateful_plugin.md
Normal file
@@ -0,0 +1,16 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createPostProcessorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. The closure-captured state pattern is therefore unreachable. Custom plugins must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A stateful Dart `PostProcessor` that captures mutable counters in its
|
||||
// closure cannot be plugged into the global registry.
|
||||
// `Kreuzberg.registerPostProcessor(impl)` exists, but the
|
||||
// `createPostProcessorDartImpl` factory takes opaque `BoxFn*` closure
|
||||
// values whose constructors are not surfaced through flutter_rust_bridge,
|
||||
// so the closure-capture pattern is unreachable from Dart.
|
||||
//
|
||||
// Implement stateful plugins in Rust using `Mutex`/`AtomicU64` for
|
||||
// interior mutability, then register them in a Rust shim crate.
|
||||
}
|
||||
```
|
||||
23
docs/snippets/dart/plugins/unregister_plugins.md
Normal file
23
docs/snippets/dart/plugins/unregister_plugins.md
Normal file
@@ -0,0 +1,23 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// Custom-plugin construction (createXxxDartImpl) is unreachable from Dart
|
||||
// due to opaque BoxFn closure types in the flutter_rust_bridge binding,
|
||||
// so this snippet exercises the lifecycle against the *built-in* renderer
|
||||
// registry (markdown / html / djot / plain).
|
||||
|
||||
var renderers = await KreuzbergBridge.listRenderers();
|
||||
print('Renderers before unregister: $renderers');
|
||||
|
||||
// Unregister a single renderer by name.
|
||||
await KreuzbergBridge.unregisterRenderer('plain');
|
||||
renderers = await KreuzbergBridge.listRenderers();
|
||||
print('Renderers after unregister: $renderers');
|
||||
|
||||
// Bulk-clear all renderers (including remaining built-ins).
|
||||
await KreuzbergBridge.clearRenderers();
|
||||
renderers = await KreuzbergBridge.listRenderers();
|
||||
print('Renderers after clear: $renderers');
|
||||
}
|
||||
```
|
||||
16
docs/snippets/dart/plugins/word_count_processor.md
Normal file
16
docs/snippets/dart/plugins/word_count_processor.md
Normal file
@@ -0,0 +1,16 @@
|
||||
<!-- snippet:skip reason="Dart cannot construct the opaque BoxFn closure types required by createPostProcessorDartImpl — flutter_rust_bridge generates them as RustOpaqueInterface with no Dart-side wrapper. The ProcessingStage enum is also not surfaced. Custom post-processors must be written in Rust." -->
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
// A Dart implementation of the `PostProcessor` trait that counts words in
|
||||
// the extracted content cannot be plugged into the global registry.
|
||||
// `Kreuzberg.registerPostProcessor(impl)` exists, but its
|
||||
// `createPostProcessorDartImpl` factory takes opaque `BoxFn*` closure
|
||||
// values plus a `BoxFnDartFnFutureProcessingStage` whose constructors are
|
||||
// not surfaced through flutter_rust_bridge.
|
||||
//
|
||||
// Implement the post-processor in Rust as `Plugin + PostProcessor` and
|
||||
// register it via `register_post_processor` in a Rust shim crate.
|
||||
}
|
||||
```
|
||||
32
docs/snippets/dart/utils/chunking.md
Normal file
32
docs/snippets/dart/utils/chunking.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.markdown,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks: ${chunks.length}');
|
||||
for (final chunk in chunks) {
|
||||
print('Chunk ${chunk.metadata.chunkIndex}: ${chunk.content.length} chars');
|
||||
}
|
||||
}
|
||||
```
|
||||
50
docs/snippets/dart/utils/chunking_rag.md
Normal file
50
docs/snippets/dart/utils/chunking_rag.md
Normal file
@@ -0,0 +1,50 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 500,
|
||||
overlap: 50,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
for (final chunk in chunks) {
|
||||
final index = chunk.metadata.chunkIndex;
|
||||
final total = chunk.metadata.totalChunks;
|
||||
final start = chunk.metadata.byteStart;
|
||||
final end = chunk.metadata.byteEnd;
|
||||
final preview = chunk.content.length > 100
|
||||
? chunk.content.substring(0, 100)
|
||||
: chunk.content;
|
||||
print('Chunk ${index + 1}/$total');
|
||||
print('Position: $start-$end');
|
||||
print('Content: $preview...');
|
||||
final embedding = chunk.embedding;
|
||||
if (embedding != null) {
|
||||
print('Embedding: ${embedding.length} dimensions');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
35
docs/snippets/dart/utils/embedding_with_chunking.md
Normal file
35
docs/snippets/dart/utils/embedding_with_chunking.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1024,
|
||||
overlap: 100,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
final chunks = result.chunks ?? const [];
|
||||
print('Chunks with embeddings: ${chunks.length}');
|
||||
}
|
||||
```
|
||||
33
docs/snippets/dart/utils/keyword_extraction_example.md
Normal file
33
docs/snippets/dart/utils/keyword_extraction_example.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```dart title="Dart"
|
||||
import 'package:flutter_rust_bridge/flutter_rust_bridge.dart' show Int64List;
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
keywords: KeywordConfig(
|
||||
algorithm: KeywordAlgorithm.yake,
|
||||
maxKeywords: 10,
|
||||
minScore: 0.3,
|
||||
ngramRange: Int64List.fromList(<int>[1, 3]),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
useLayoutForMarkdown: false,
|
||||
maxArchiveDepth: 3,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
|
||||
final keywords = result.extractedKeywords;
|
||||
if (keywords != null) {
|
||||
for (final keyword in keywords) {
|
||||
print('${keyword.text} (score: ${keyword.score})');
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
30
docs/snippets/dart/utils/quality_processing_example.md
Normal file
30
docs/snippets/dart/utils/quality_processing_example.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('scanned_document.pdf', null, config);
|
||||
final score = result.qualityScore;
|
||||
if (score != null) {
|
||||
if (score < 0.5) {
|
||||
print('Warning: Low quality extraction (${score.toStringAsFixed(2)})');
|
||||
} else {
|
||||
print('Quality score: ${score.toStringAsFixed(2)}');
|
||||
}
|
||||
}
|
||||
for (final warning in result.processingWarnings) {
|
||||
print('Warning: $warning');
|
||||
}
|
||||
}
|
||||
```
|
||||
18
docs/snippets/dart/utils/standalone_embed.md
Normal file
18
docs/snippets/dart/utils/standalone_embed.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
const config = EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
);
|
||||
|
||||
final texts = <String>['Hello, world!', 'Kreuzberg is fast'];
|
||||
final embeddings = await KreuzbergBridge.embedTexts(texts, config);
|
||||
|
||||
print('Vectors: ${embeddings.length}');
|
||||
print('Dimensions: ${embeddings.first.length}');
|
||||
}
|
||||
```
|
||||
24
docs/snippets/dart/utils/token_reduction.md
Normal file
24
docs/snippets/dart/utils/token_reduction.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
tokenReduction: const TokenReductionOptions(
|
||||
mode: 'moderate',
|
||||
preserveImportantWords: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Reduced content length: ${result.content.length}');
|
||||
}
|
||||
```
|
||||
24
docs/snippets/dart/utils/token_reduction_example.md
Normal file
24
docs/snippets/dart/utils/token_reduction_example.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
tokenReduction: const TokenReductionOptions(
|
||||
mode: 'moderate',
|
||||
preserveImportantWords: true,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('verbose_document.pdf', null, config);
|
||||
print('Content length after reduction: ${result.content.length}');
|
||||
}
|
||||
```
|
||||
77
docs/snippets/dart/utils/vector_database_integration.md
Normal file
77
docs/snippets/dart/utils/vector_database_integration.md
Normal file
@@ -0,0 +1,77 @@
|
||||
```dart title="Dart"
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
class VectorRecord {
|
||||
VectorRecord({
|
||||
required this.id,
|
||||
required this.content,
|
||||
required this.embedding,
|
||||
required this.metadata,
|
||||
});
|
||||
|
||||
final String id;
|
||||
final String content;
|
||||
final Float64List embedding;
|
||||
final Map<String, String> metadata;
|
||||
}
|
||||
|
||||
Future<List<VectorRecord>> extractAndVectorize(
|
||||
String documentPath,
|
||||
String documentId,
|
||||
) async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 512,
|
||||
overlap: 50,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
embedding: EmbeddingConfig(
|
||||
model: EmbeddingModelType.preset(name: 'balanced'),
|
||||
normalize: true,
|
||||
batchSize: 32,
|
||||
showDownloadProgress: false,
|
||||
),
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile(documentPath, null, config);
|
||||
final records = <VectorRecord>[];
|
||||
final chunks = result.chunks ?? const [];
|
||||
for (var index = 0; index < chunks.length; index++) {
|
||||
final chunk = chunks[index];
|
||||
final embedding = chunk.embedding;
|
||||
if (embedding == null) {
|
||||
continue;
|
||||
}
|
||||
records.add(VectorRecord(
|
||||
id: '${documentId}_chunk_$index',
|
||||
content: chunk.content,
|
||||
embedding: embedding,
|
||||
metadata: <String, String>{
|
||||
'document_id': documentId,
|
||||
'chunk_index': index.toString(),
|
||||
'content_length': chunk.content.length.toString(),
|
||||
},
|
||||
));
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
Future<void> main() async {
|
||||
final records = await extractAndVectorize('document.pdf', 'doc-001');
|
||||
print('Vector records: ${records.length}');
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user