Files
fil/docs/snippets/java/config/tesseract_config.md

19 lines
573 B
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```java title="Java"
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.TesseractConfig;
ExtractionConfig config = ExtractionConfig.builder()
.ocr(OcrConfig.builder()
.language("eng+fra+deu")
.tesseractConfig(TesseractConfig.builder()
.psm(6)
.oem(1)
.minConfidence(0.8)
.tesseditCharWhitelist("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?")
.enableTableDetection(true)
.build())
.build())
.build();
```