Files
fil/docs/snippets/dart/config/tesseract_config.md

49 lines
1.4 KiB
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```dart title="Dart"
import 'package:kreuzberg/kreuzberg.dart';
Future<void> main() async {
final config = ExtractionConfig(
useCache: true,
enableQualityProcessing: true,
forceOcr: false,
disableOcr: false,
ocr: const OcrConfig(
enabled: true,
backend: 'tesseract',
language: 'eng+deu',
autoRotate: false,
tesseractConfig: TesseractConfig(
language: 'eng+deu',
psm: 6,
outputFormat: 'text',
oem: 3,
minConfidence: 0.0,
enableTableDetection: false,
tableMinConfidence: 0.5,
tableColumnThreshold: 20,
tableRowThresholdRatio: 0.5,
useCache: true,
classifyUsePreAdaptedTemplates: false,
languageModelNgramOn: false,
tesseditDontBlkrejGoodWds: false,
tesseditDontRowrejGoodWds: false,
tesseditEnableDictCorrection: false,
tesseditCharWhitelist: '',
tesseditCharBlacklist: '',
tesseditUsePrimaryParamsModel: false,
textordSpaceSizeIsVariable: false,
thresholdingMethod: false,
),
),
resultFormat: ResultFormat.unified,
outputFormat: OutputFormat.plain(),
includeDocumentStructure: false,
maxArchiveDepth: 3,
useLayoutForMarkdown: false,
);
final result = await KreuzbergBridge.extractFile('scanned.pdf', null, config);
print('OCR text: ${result.content}');
}
```