1.0 KiB
1.0 KiB
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ChunkingConfig;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.TesseractConfig;
public final class ProgrammaticConfigExample {
public static void main(String[] args) throws Exception {
ExtractionConfig config = ExtractionConfig.builder()
.ocr(OcrConfig.builder()
.backend("tesseract")
.language("eng+deu")
.tesseractConfig(TesseractConfig.builder()
.psm(6)
.build())
.build())
.chunking(ChunkingConfig.builder()
.maxChars(1000)
.maxOverlap(200)
.build())
.useCache(true)
.enableQualityProcessing(true)
.build();
ExtractionResult result = Kreuzberg.extractFile("document.pdf", config);
System.out.printf("Content length: %d%n", result.getContent().length());
}
}