Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("tesseract")
.withLanguage("eng")
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(800L)
.withOverlap(100L)
.withChunkerType(ChunkerType.MARKDOWN)
.withPrependHeadingContext(true)
.build()
val images = ImageExtractionConfig.builder()
.withExtractImages(true)
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.withForceOcr(false)
.withChunking(Optional.of(chunking))
.withOutputFormat(OutputFormat.MARKDOWN)
.withIncludeDocumentStructure(true)
.withImages(Optional.of(images))
.withUseCache(true)
.withEnableQualityProcessing(true)
.build()
val result = Kreuzberg.extractFileSync(Paths.get("report.pdf"), null, config)
val content = result.content()
println("Content (${content.length} chars):")
println(content.take(200))
result.chunks()?.let { println("\nChunks: ${it.size}") }
println("Tables: ${result.tables()?.size ?: 0}")
result.detectedLanguages()?.let { println("Languages: $it") }
result.extractionMethod()?.let { println("Extraction method: $it") }
}
```