Files
fil/docs/snippets/java/config/config_programmatic.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.0 KiB

import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ChunkingConfig;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.TesseractConfig;

public final class ProgrammaticConfigExample {
    public static void main(String[] args) throws Exception {
        ExtractionConfig config = ExtractionConfig.builder()
            .ocr(OcrConfig.builder()
                .backend("tesseract")
                .language("eng+deu")
                .tesseractConfig(TesseractConfig.builder()
                    .psm(6)
                    .build())
                .build())
            .chunking(ChunkingConfig.builder()
                .maxChars(1000)
                .maxOverlap(200)
                .build())
            .useCache(true)
            .enableQualityProcessing(true)
            .build();

        ExtractionResult result = Kreuzberg.extractFile("document.pdf", config);
        System.out.printf("Content length: %d%n", result.getContent().length());
    }
}