Files
fil/docs/snippets/kotlin/config/chunking_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

2.3 KiB

import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional

fun main() {
    val chunking = ChunkingConfig.builder()
        .withMaxCharacters(1000L)
        .withOverlap(200L)
        .build()

    val config = ExtractionConfig.builder()
        .withChunking(Optional.of(chunking))
        .build()

    val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
    val chunks = result.chunks().orEmpty()
    println("Chunks: ${chunks.size}")
    for (chunk in chunks) {
        println("Length: ${chunk.content().length}")
    }
}
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional

fun main() {
    val sizing = ChunkSizing.Tokenizer("Xenova/gpt-4o", Optional.empty())

    val chunking = ChunkingConfig.builder()
        .withMaxCharacters(500L)
        .withOverlap(50L)
        .withChunkerType(ChunkerType.Markdown)
        .withSizing(sizing)
        .build()

    val config = ExtractionConfig.builder()
        .withChunking(Optional.of(chunking))
        .build()

    val result = Kreuzberg.extractFileSync(Paths.get("document.md"), null, config)
    for (chunk in result.chunks().orEmpty()) {
        chunk.metadata()?.headingContext()?.let { ctx ->
            for (heading in ctx.headings()) {
                println("Heading L${heading.level()}: ${heading.text()}")
            }
        }
        val text = chunk.content()
        println("Content: ${text.take(100)}...")
    }
}
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional

fun main() {
    val chunking = ChunkingConfig.builder()
        .withMaxCharacters(500L)
        .withOverlap(50L)
        .withChunkerType(ChunkerType.Markdown)
        .withPrependHeadingContext(true)
        .build()

    val config = ExtractionConfig.builder()
        .withChunking(Optional.of(chunking))
        .build()

    val result = Kreuzberg.extractFileSync(Paths.get("document.md"), null, config)
    for (chunk in result.chunks().orEmpty()) {
        // Each chunk's content is prefixed with its heading breadcrumb
        val text = chunk.content()
        println("Content: ${text.take(100)}...")
    }
}