This commit is contained in:
35
docs/snippets/java/advanced/chunking_rag.md
Normal file
35
docs/snippets/java/advanced/chunking_rag.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.ChunkingConfig;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.EmbeddingModelType;
|
||||
import java.util.List;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.chunking(ChunkingConfig.builder()
|
||||
.maxChars(500)
|
||||
.maxOverlap(50)
|
||||
.embedding(EmbeddingConfig.builder()
|
||||
.model(EmbeddingModelType.preset("all-mpnet-base-v2"))
|
||||
.normalize(true)
|
||||
.batchSize(16)
|
||||
.build())
|
||||
.build())
|
||||
.build();
|
||||
|
||||
try {
|
||||
ExtractionResult result = Kreuzberg.extractFile("research_paper.pdf", config);
|
||||
|
||||
List<Object> chunks = result.getChunks() != null ? result.getChunks() : List.of();
|
||||
System.out.println("Found " + chunks.size() + " chunks for RAG pipeline");
|
||||
|
||||
for (int i = 0; i < Math.min(3, chunks.size()); i++) {
|
||||
Object chunk = chunks.get(i);
|
||||
System.out.println("Chunk " + i + ": " + chunk.toString().substring(0, Math.min(80, chunk.toString().length())) + "...");
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
System.err.println("RAG extraction failed: " + ex.getMessage());
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user