Files
fil/docs/snippets/kotlin/api/combining_all_features.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.3 KiB

import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional

fun main() {
    val ocr = OcrConfig.builder()
        .withBackend("tesseract")
        .withLanguage("eng")
        .build()

    val chunking = ChunkingConfig.builder()
        .withMaxCharacters(800L)
        .withOverlap(100L)
        .withChunkerType(ChunkerType.MARKDOWN)
        .withPrependHeadingContext(true)
        .build()

    val images = ImageExtractionConfig.builder()
        .withExtractImages(true)
        .build()

    val config = ExtractionConfig.builder()
        .withOcr(Optional.of(ocr))
        .withForceOcr(false)
        .withChunking(Optional.of(chunking))
        .withOutputFormat(OutputFormat.MARKDOWN)
        .withIncludeDocumentStructure(true)
        .withImages(Optional.of(images))
        .withUseCache(true)
        .withEnableQualityProcessing(true)
        .build()

    val result = Kreuzberg.extractFileSync(Paths.get("report.pdf"), null, config)

    val content = result.content()
    println("Content (${content.length} chars):")
    println(content.take(200))

    result.chunks()?.let { println("\nChunks: ${it.size}") }
    println("Tables: ${result.tables()?.size ?: 0}")
    result.detectedLanguages()?.let { println("Languages: $it") }
    result.extractionMethod()?.let { println("Extraction method: $it") }
}