This commit is contained in:
25
docs/snippets/java/metadata/PageBoundaries.md
Normal file
25
docs/snippets/java/metadata/PageBoundaries.md
Normal file
@@ -0,0 +1,25 @@
|
||||
Import dev.kreuzberg.\*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
var result = Kreuzberg.extractFileSync("document.pdf");
|
||||
|
||||
If (result.metadata().pages() != null &&
|
||||
result.metadata().pages().boundaries() != null) {
|
||||
|
||||
var contentBytes = result.content().getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
for (var boundary : result.metadata().pages().boundaries().subList(0, 3)) {
|
||||
var pageBytes = Arrays.copyOfRange(
|
||||
contentBytes,
|
||||
boundary.byteStart(),
|
||||
boundary.byteEnd()
|
||||
);
|
||||
var pageText = new String(pageBytes, StandardCharsets.UTF_8);
|
||||
|
||||
System.out.println("Page " + boundary.pageNumber() + ":");
|
||||
System.out.println(" Byte range: " + boundary.byteStart() +
|
||||
"-" + boundary.byteEnd());
|
||||
System.out.println(" Preview: " + pageText.substring(0, 100) + "...");
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user