This commit is contained in:
27
docs/snippets/java/advanced/ChunkPageMapping.md
Normal file
27
docs/snippets/java/advanced/ChunkPageMapping.md
Normal file
@@ -0,0 +1,27 @@
|
||||
Import dev.kreuzberg.\*;
|
||||
|
||||
var config = ExtractionConfig.builder()
|
||||
.chunking(ChunkingConfig.builder()
|
||||
.chunkSize(500)
|
||||
.overlap(50)
|
||||
.build())
|
||||
.pages(PageConfig.builder()
|
||||
.extractPages(true)
|
||||
.build())
|
||||
.build();
|
||||
|
||||
var result = Kreuzberg.extractFileSync("document.pdf", config);
|
||||
|
||||
If (result.chunks() != null) {
|
||||
for (var chunk : result.chunks()) {
|
||||
if (chunk.metadata().firstPage() != null) {
|
||||
var pageRange = chunk.metadata().firstPage().equals(chunk.metadata().lastPage())
|
||||
? "Page " + chunk.metadata().firstPage()
|
||||
: "Pages " + chunk.metadata().firstPage() + "-" + chunk.metadata().lastPage();
|
||||
|
||||
System.out.println("Chunk: " + chunk.text().substring(0, 50) +
|
||||
"... (" + pageRange + ")");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user