This commit is contained in:
18
docs/snippets/java/config/tesseract_config.md
Normal file
18
docs/snippets/java/config/tesseract_config.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.OcrConfig;
|
||||
import dev.kreuzberg.TesseractConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.ocr(OcrConfig.builder()
|
||||
.language("eng+fra+deu")
|
||||
.tesseractConfig(TesseractConfig.builder()
|
||||
.psm(6)
|
||||
.oem(1)
|
||||
.minConfidence(0.8)
|
||||
.tesseditCharWhitelist("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?")
|
||||
.enableTableDetection(true)
|
||||
.build())
|
||||
.build())
|
||||
.build();
|
||||
```
|
||||
Reference in New Issue
Block a user