Files
fil/docs/snippets/java/ocr/ocr_elements.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.4 KiB

import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.KreuzbergException;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.OcrConfig;
import dev.kreuzberg.types.OcrElement;
import java.io.IOException;

public class Main {
    public static void main(String[] args) {
        try {
            ExtractionConfig config = ExtractionConfig.builder()
                .ocr(OcrConfig.builder()
                    .backend("paddle-ocr")
                    .language("en")
                    .build())
                .build();

            ExtractionResult result = Kreuzberg.extractFile("scanned.pdf", config);

            if (result.getOcrElements() != null) {
                for (OcrElement element : result.getOcrElements()) {
                    System.out.printf("Text: %s%n", element.getText());
                    System.out.printf("Confidence: %.2f%n", element.getConfidence().getRecognition());
                    System.out.printf("Geometry: %s%n", element.getGeometry());
                    if (element.getRotation() != null) {
                        System.out.printf("Rotation: %.1f°%n", element.getRotation().getAngle());
                    }
                    System.out.println();
                }
            }
        } catch (IOException | KreuzbergException e) {
            System.err.println("Extraction failed: " + e.getMessage());
        }
    }
}