Files
fil/docs/snippets/java/config/tesseract_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

573 B

import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.TesseractConfig;

ExtractionConfig config = ExtractionConfig.builder()
    .ocr(OcrConfig.builder()
        .language("eng+fra+deu")
        .tesseractConfig(TesseractConfig.builder()
            .psm(6)
            .oem(1)
            .minConfidence(0.8)
            .tesseditCharWhitelist("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?")
            .enableTableDetection(true)
            .build())
        .build())
    .build();