19 lines
573 B
Markdown
19 lines
573 B
Markdown
|
|
```java title="Java"
|
||
|
|
import dev.kreuzberg.ExtractionConfig;
|
||
|
|
import dev.kreuzberg.OcrConfig;
|
||
|
|
import dev.kreuzberg.TesseractConfig;
|
||
|
|
|
||
|
|
ExtractionConfig config = ExtractionConfig.builder()
|
||
|
|
.ocr(OcrConfig.builder()
|
||
|
|
.language("eng+fra+deu")
|
||
|
|
.tesseractConfig(TesseractConfig.builder()
|
||
|
|
.psm(6)
|
||
|
|
.oem(1)
|
||
|
|
.minConfidence(0.8)
|
||
|
|
.tesseditCharWhitelist("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?")
|
||
|
|
.enableTableDetection(true)
|
||
|
|
.build())
|
||
|
|
.build())
|
||
|
|
.build();
|
||
|
|
```
|