533 B
533 B
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.ImagePreprocessingConfig;
ExtractionConfig config = ExtractionConfig.builder()
.ocr(OcrConfig.builder()
.backend("tesseract")
.build())
.imagePreprocessing(ImagePreprocessingConfig.builder()
.targetDpi(300)
.build())
.build();
ExtractionResult result = Kreuzberg.extractFile("scanned.pdf", config);