Files
fil/docs/snippets/dart/api/combining_all_features.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

import 'package:kreuzberg/kreuzberg.dart';

Future<void> main() async {
  final config = ExtractionConfig(
    useCache: true,
    enableQualityProcessing: true,
    // OCR: Tesseract on English text
    forceOcr: false,
    disableOcr: false,
    ocr: const OcrConfig(
      enabled: true,
      backend: 'tesseract',
      language: 'eng',
      autoRotate: false,
    ),
    // Chunking: ~800-character markdown chunks with 100-char overlap
    chunking: const ChunkingConfig(
      maxCharacters: 800,
      overlap: 100,
      trim: true,
      chunkerType: ChunkerType.markdown,
      sizing: ChunkSizing.characters(),
      prependHeadingContext: true,
    ),
    // Image extraction
    images: const ImageExtractionConfig(
      extractImages: true,
      targetDpi: 150,
      maxImageDimension: 4096,
      injectPlaceholders: false,
      autoAdjustDpi: true,
      minDpi: 72,
      maxDpi: 300,
      classify: false,
    ),
    // Output: markdown with full document structure
    resultFormat: ResultFormat.unified,
    outputFormat: OutputFormat.markdown(),
    includeDocumentStructure: true,
    maxArchiveDepth: 3,
    useLayoutForMarkdown: false,
  );

  final result = await KreuzbergBridge.extractFile('report.pdf', null, config);

  print('Content (${result.content.length} chars):');
  final preview = result.content.substring(
    0,
    result.content.length < 200 ? result.content.length : 200,
  );
  print(preview);

  if (result.chunks != null) {
    print('\nChunks: ${result.chunks!.length}');
  }
  print('Tables: ${result.tables.length}');
  if (result.detectedLanguages != null) {
    print('Languages: ${result.detectedLanguages}');
  }
  if (result.extractionMethod != null) {
    print('Extraction method: ${result.extractionMethod}');
  }
}