This commit is contained in:
56
docs/snippets/dart/config/config_programmatic.md
Normal file
56
docs/snippets/dart/config/config_programmatic.md
Normal file
@@ -0,0 +1,56 @@
|
||||
```dart title="Dart"
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
|
||||
Future<void> main() async {
|
||||
final config = ExtractionConfig(
|
||||
useCache: true,
|
||||
enableQualityProcessing: true,
|
||||
forceOcr: false,
|
||||
disableOcr: false,
|
||||
ocr: const OcrConfig(
|
||||
enabled: true,
|
||||
backend: 'tesseract',
|
||||
language: 'eng+deu',
|
||||
autoRotate: false,
|
||||
tesseractConfig: TesseractConfig(
|
||||
language: 'eng+deu',
|
||||
psm: 6,
|
||||
outputFormat: 'text',
|
||||
oem: 3,
|
||||
minConfidence: 0.0,
|
||||
enableTableDetection: false,
|
||||
tableMinConfidence: 0.5,
|
||||
tableColumnThreshold: 20,
|
||||
tableRowThresholdRatio: 0.5,
|
||||
useCache: true,
|
||||
classifyUsePreAdaptedTemplates: false,
|
||||
languageModelNgramOn: false,
|
||||
tesseditDontBlkrejGoodWds: false,
|
||||
tesseditDontRowrejGoodWds: false,
|
||||
tesseditEnableDictCorrection: false,
|
||||
tesseditCharWhitelist: '',
|
||||
tesseditCharBlacklist: '',
|
||||
tesseditUsePrimaryParamsModel: false,
|
||||
textordSpaceSizeIsVariable: false,
|
||||
thresholdingMethod: false,
|
||||
),
|
||||
),
|
||||
chunking: const ChunkingConfig(
|
||||
maxCharacters: 1000,
|
||||
overlap: 200,
|
||||
trim: true,
|
||||
chunkerType: ChunkerType.text,
|
||||
sizing: ChunkSizing.characters(),
|
||||
prependHeadingContext: false,
|
||||
),
|
||||
resultFormat: ResultFormat.unified,
|
||||
outputFormat: OutputFormat.plain(),
|
||||
includeDocumentStructure: false,
|
||||
maxArchiveDepth: 3,
|
||||
useLayoutForMarkdown: false,
|
||||
);
|
||||
|
||||
final result = await KreuzbergBridge.extractFile('document.pdf', null, config);
|
||||
print('Content length: ${result.content.length}');
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user