Files
fil/docs/snippets/kotlin/utils/chunking_rag.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.1 KiB

import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional

fun main() {
    val embedding = EmbeddingConfig.builder()
        .withModel(EmbeddingModelType.Preset("balanced"))
        .withNormalize(true)
        .build()

    val chunking = ChunkingConfig.builder()
        .withMaxCharacters(500L)
        .withOverlap(50L)
        .withEmbedding(Optional.of(embedding))
        .build()

    val config = ExtractionConfig.builder()
        .withChunking(Optional.of(chunking))
        .build()

    val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
    for (chunk in result.chunks().orEmpty()) {
        val metadata = chunk.metadata()
        println("Chunk ${metadata.chunkIndex() + 1}/${metadata.totalChunks()}")
        println("Position: ${metadata.byteStart()}-${metadata.byteEnd()}")
        val text = chunk.content()
        val preview = text.take(100)
        println("Content: $preview...")
        chunk.embedding()?.let { vector ->
            println("Embedding: ${vector.size} dimensions")
        }
    }
}