Files
fil/docs/snippets/kotlin/utils/chunking_rag.md

36 lines
1.1 KiB
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(500L)
.withOverlap(50L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
for (chunk in result.chunks().orEmpty()) {
val metadata = chunk.metadata()
println("Chunk ${metadata.chunkIndex() + 1}/${metadata.totalChunks()}")
println("Position: ${metadata.byteStart()}-${metadata.byteEnd()}")
val text = chunk.content()
val preview = text.take(100)
println("Content: $preview...")
chunk.embedding()?.let { vector ->
println("Embedding: ${vector.size} dimensions")
}
}
}
```