Files
fil/docs/snippets/dart/utils/chunking_rag.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.5 KiB

import 'package:kreuzberg/kreuzberg.dart';

Future<void> main() async {
  final config = ExtractionConfig(
    useCache: true,
    enableQualityProcessing: true,
    forceOcr: false,
    disableOcr: false,
    chunking: const ChunkingConfig(
      maxCharacters: 500,
      overlap: 50,
      trim: true,
      chunkerType: ChunkerType.text,
      sizing: ChunkSizing.characters(),
      prependHeadingContext: false,
      embedding: EmbeddingConfig(
        model: EmbeddingModelType.preset(name: 'balanced'),
        normalize: true,
        batchSize: 32,
        showDownloadProgress: false,
      ),
    ),
    resultFormat: ResultFormat.unified,
    outputFormat: OutputFormat.plain(),
    includeDocumentStructure: false,
    maxArchiveDepth: 3,
    useLayoutForMarkdown: false,
  );

  final result = await KreuzbergBridge.extractFile('research_paper.pdf', null, config);
  final chunks = result.chunks ?? const [];
  for (final chunk in chunks) {
    final index = chunk.metadata.chunkIndex;
    final total = chunk.metadata.totalChunks;
    final start = chunk.metadata.byteStart;
    final end = chunk.metadata.byteEnd;
    final preview = chunk.content.length > 100
        ? chunk.content.substring(0, 100)
        : chunk.content;
    print('Chunk ${index + 1}/$total');
    print('Position: $start-$end');
    print('Content: $preview...');
    final embedding = chunk.embedding;
    if (embedding != null) {
      print('Embedding: ${embedding.length} dimensions');
    }
  }
}