Files
fil/docs/snippets/java/metadata/PageBoundaries.md

26 lines
868 B
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
Import dev.kreuzberg.\*;
import java.nio.charset.StandardCharsets;
var result = Kreuzberg.extractFileSync("document.pdf");
If (result.metadata().pages() != null &&
result.metadata().pages().boundaries() != null) {
var contentBytes = result.content().getBytes(StandardCharsets.UTF_8);
for (var boundary : result.metadata().pages().boundaries().subList(0, 3)) {
var pageBytes = Arrays.copyOfRange(
contentBytes,
boundary.byteStart(),
boundary.byteEnd()
);
var pageText = new String(pageBytes, StandardCharsets.UTF_8);
System.out.println("Page " + boundary.pageNumber() + ":");
System.out.println(" Byte range: " + boundary.byteStart() +
"-" + boundary.byteEnd());
System.out.println(" Preview: " + pageText.substring(0, 100) + "...");
}
}