Files
fil/docs/snippets/java/config/config_programmatic.md

32 lines
1.0 KiB
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ChunkingConfig;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.TesseractConfig;
public final class ProgrammaticConfigExample {
public static void main(String[] args) throws Exception {
ExtractionConfig config = ExtractionConfig.builder()
.ocr(OcrConfig.builder()
.backend("tesseract")
.language("eng+deu")
.tesseractConfig(TesseractConfig.builder()
.psm(6)
.build())
.build())
.chunking(ChunkingConfig.builder()
.maxChars(1000)
.maxOverlap(200)
.build())
.useCache(true)
.enableQualityProcessing(true)
.build();
ExtractionResult result = Kreuzberg.extractFile("document.pdf", config);
System.out.printf("Content length: %d%n", result.getContent().length());
}
}
```