This commit is contained in:
35
docs/snippets/kotlin/advanced/chunking_rag.md
Normal file
35
docs/snippets/kotlin/advanced/chunking_rag.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val embedding = EmbeddingConfig.builder()
|
||||
.withModel(EmbeddingModelType.Preset("balanced"))
|
||||
.withNormalize(true)
|
||||
.build()
|
||||
|
||||
val chunking = ChunkingConfig.builder()
|
||||
.withMaxCharacters(500L)
|
||||
.withOverlap(50L)
|
||||
.withEmbedding(Optional.of(embedding))
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withChunking(Optional.of(chunking))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
|
||||
for (chunk in result.chunks().orEmpty()) {
|
||||
val metadata = chunk.metadata()
|
||||
println("Chunk ${metadata.chunkIndex() + 1}/${metadata.totalChunks()}")
|
||||
println("Position: ${metadata.byteStart()}-${metadata.byteEnd()}")
|
||||
val text = chunk.content()
|
||||
val preview = text.take(100)
|
||||
println("Content: $preview...")
|
||||
chunk.embedding()?.let { embedding ->
|
||||
println("Embedding: ${embedding.size} dimensions")
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user