Files
fil/docs/snippets/java/config/ElementBasedOutput.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

import io.kreuzberg.Kreuzberg;
import io.kreuzberg.ExtractionConfig;
import io.kreuzberg.ExtractionResult;
import io.kreuzberg.Element;
import io.kreuzberg.OutputFormat;

public class ElementBasedOutput {
    public static void main(String[] args) {
        // Configure element-based output
        ExtractionConfig config = new ExtractionConfig();
        config.setOutputFormat(OutputFormat.ELEMENT_BASED);

        // Extract document
        ExtractionResult result = Kreuzberg.extractFileSync("document.pdf", config);

        // Access elements
        for (Element element : result.getElements()) {
            System.out.println("Type: " + element.getElementType());

            String text = element.getText();
            if (text.length() > 100) {
                text = text.substring(0, 100);
            }
            System.out.println("Text: " + text);

            if (element.getMetadata().getPageNumber() != null) {
                System.out.println("Page: " + element.getMetadata().getPageNumber());
            }

            if (element.getMetadata().getCoordinates() != null) {
                var coords = element.getMetadata().getCoordinates();
                System.out.printf("Coords: (%f, %f) - (%f, %f)%n",
                    coords.getLeft(), coords.getTop(),
                    coords.getRight(), coords.getBottom());
            }

            System.out.println("---");
        }

        // Filter by element type
        result.getElements().stream()
            .filter(e -> "title".equals(e.getElementType()))
            .forEach(title -> {
                String level = (String) title.getMetadata()
                    .getAdditional()
                    .getOrDefault("level", "unknown");
                System.out.printf("[%s] %s%n", level, title.getText());
            });
    }
}