Files
fil/docs/snippets/dart/config/config_programmatic.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.6 KiB

import 'package:kreuzberg/kreuzberg.dart';

Future<void> main() async {
  final config = ExtractionConfig(
    useCache: true,
    enableQualityProcessing: true,
    forceOcr: false,
    disableOcr: false,
    ocr: const OcrConfig(
      enabled: true,
      backend: 'tesseract',
      language: 'eng+deu',
      autoRotate: false,
      tesseractConfig: TesseractConfig(
        language: 'eng+deu',
        psm: 6,
        outputFormat: 'text',
        oem: 3,
        minConfidence: 0.0,
        enableTableDetection: false,
        tableMinConfidence: 0.5,
        tableColumnThreshold: 20,
        tableRowThresholdRatio: 0.5,
        useCache: true,
        classifyUsePreAdaptedTemplates: false,
        languageModelNgramOn: false,
        tesseditDontBlkrejGoodWds: false,
        tesseditDontRowrejGoodWds: false,
        tesseditEnableDictCorrection: false,
        tesseditCharWhitelist: '',
        tesseditCharBlacklist: '',
        tesseditUsePrimaryParamsModel: false,
        textordSpaceSizeIsVariable: false,
        thresholdingMethod: false,
      ),
    ),
    chunking: const ChunkingConfig(
      maxCharacters: 1000,
      overlap: 200,
      trim: true,
      chunkerType: ChunkerType.text,
      sizing: ChunkSizing.characters(),
      prependHeadingContext: false,
    ),
    resultFormat: ResultFormat.unified,
    outputFormat: OutputFormat.plain(),
    includeDocumentStructure: false,
    maxArchiveDepth: 3,
    useLayoutForMarkdown: false,
  );

  final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
  print('Content length: ${result.content.length}');
}