19 lines
533 B
Markdown
19 lines
533 B
Markdown
```java title="Java"
|
|
import dev.kreuzberg.Kreuzberg;
|
|
import dev.kreuzberg.ExtractionResult;
|
|
import dev.kreuzberg.ExtractionConfig;
|
|
import dev.kreuzberg.OcrConfig;
|
|
import dev.kreuzberg.ImagePreprocessingConfig;
|
|
|
|
ExtractionConfig config = ExtractionConfig.builder()
|
|
.ocr(OcrConfig.builder()
|
|
.backend("tesseract")
|
|
.build())
|
|
.imagePreprocessing(ImagePreprocessingConfig.builder()
|
|
.targetDpi(300)
|
|
.build())
|
|
.build();
|
|
|
|
ExtractionResult result = Kreuzberg.extractFile("scanned.pdf", config);
|
|
```
|