Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val chunking = ChunkingConfig.builder()
.withMaxCharacters(500L)
.withOverlap(50L)
.build()
val pages = PageConfig.builder()
.withExtractPages(true)
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.withPages(Optional.of(pages))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
for (chunk in result.chunks().orEmpty()) {
val first = chunk.metadata().firstPage()
val last = chunk.metadata().lastPage()
if (first != null && last != null) {
val pageRange = if (first == last) "Page $first" else "Pages $first-$last"
val preview = chunk.content().take(50)
println("Chunk: $preview... ($pageRange)")
}
}
}
```

View File

@@ -0,0 +1,60 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val chunking = ChunkingConfig.builder()
.withMaxCharacters(1000L)
.withOverlap(200L)
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Chunks: ${result.chunks()?.size ?: 0}")
}
```
```kotlin title="Kotlin - Semantic"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val chunking = ChunkingConfig.builder()
.withChunkerType(ChunkerType.Semantic)
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Chunks: ${result.chunks()?.size ?: 0}")
}
```
```kotlin title="Kotlin - Prepend Heading Context"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val chunking = ChunkingConfig.builder()
.withMaxCharacters(500L)
.withOverlap(50L)
.withChunkerType(ChunkerType.Markdown)
.withPrependHeadingContext(true)
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.md"), null, config)
println("Chunks: ${result.chunks()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,35 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(500L)
.withOverlap(50L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
for (chunk in result.chunks().orEmpty()) {
val metadata = chunk.metadata()
println("Chunk ${metadata.chunkIndex() + 1}/${metadata.totalChunks()}")
println("Position: ${metadata.byteStart()}-${metadata.byteEnd()}")
val text = chunk.content()
val preview = text.take(100)
println("Content: $preview...")
chunk.embedding()?.let { embedding ->
println("Embedding: ${embedding.size} dimensions")
}
}
}
```

View File

@@ -0,0 +1,27 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.withBatchSize(32L)
.withShowDownloadProgress(false)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(1024L)
.withOverlap(100L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Chunks with embeddings: ${result.chunks()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,22 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val keywords = KeywordConfig.builder()
.withAlgorithm(KeywordAlgorithm.Yake)
.withMaxKeywords(10L)
.withMinScore(0.3f)
.withNgramRange(listOf(1L, 3L))
.withLanguage(Optional.of("en"))
.build()
val config = ExtractionConfig.builder()
.withKeywords(Optional.of(keywords))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Keywords: ${result.extractedKeywords()}")
}
```

View File

@@ -0,0 +1,22 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val keywords = KeywordConfig.builder()
.withAlgorithm(KeywordAlgorithm.Yake)
.withMaxKeywords(10L)
.withMinScore(0.3f)
.build()
val config = ExtractionConfig.builder()
.withKeywords(Optional.of(keywords))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
result.extractedKeywords()?.let { extracted ->
println("Keywords: $extracted")
}
}
```

View File

@@ -0,0 +1,20 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val languageDetection = LanguageDetectionConfig.builder()
.withEnabled(true)
.withMinConfidence(0.8)
.withDetectMultiple(false)
.build()
val config = ExtractionConfig.builder()
.withLanguageDetection(Optional.of(languageDetection))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Detected languages: ${result.detectedLanguages()}")
}
```

View File

@@ -0,0 +1,20 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val languageDetection = LanguageDetectionConfig.builder()
.withEnabled(true)
.withMinConfidence(0.8)
.withDetectMultiple(true)
.build()
val config = ExtractionConfig.builder()
.withLanguageDetection(Optional.of(languageDetection))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("multilingual_document.pdf"), null, config)
println("Detected languages: ${result.detectedLanguages()}")
}
```

View File

@@ -0,0 +1,14 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val config = ExtractionConfig.builder()
.withEnableQualityProcessing(true)
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Quality score: ${result.qualityScore()}")
}
```

View File

@@ -0,0 +1,22 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val config = ExtractionConfig.builder()
.withEnableQualityProcessing(true)
.build()
val result = Kreuzberg.extractFileSync(Paths.get("scanned_document.pdf"), null, config)
val score = result.qualityScore()
if (score != null) {
if (score < 0.5) {
println("Warning: Low quality extraction (%.2f)".format(score))
} else {
println("Quality score: %.2f".format(score))
}
}
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val tokenReduction = TokenReductionOptions.builder()
.withMode("moderate")
.withPreserveImportantWords(true)
.build()
val config = ExtractionConfig.builder()
.withTokenReduction(Optional.of(tokenReduction))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Reduced content: ${result.content()}")
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val tokenReduction = TokenReductionOptions.builder()
.withMode("moderate")
.withPreserveImportantWords(true)
.build()
val config = ExtractionConfig.builder()
.withTokenReduction(Optional.of(tokenReduction))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("verbose_document.pdf"), null, config)
println("Reduced content length: ${result.content().length}")
}
```

View File

@@ -0,0 +1,52 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
data class VectorRecord(
val id: String,
val content: String,
val embedding: List<Float>,
val metadata: Map<String, String>
)
fun extractAndVectorize(documentPath: String, documentId: String): List<VectorRecord> {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.withBatchSize(32L)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(512L)
.withOverlap(50L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get(documentPath), null, config)
val records = mutableListOf<VectorRecord>()
val chunks = result.chunks().orEmpty()
for ((index, chunk) in chunks.withIndex()) {
val vector = chunk.embedding()
if (vector != null) {
val metadata = mapOf(
"document_id" to documentId,
"chunk_index" to index.toString(),
"content_length" to chunk.content().length.toString()
)
records += VectorRecord(
id = "${documentId}_chunk_$index",
content = chunk.content(),
embedding = vector,
metadata = metadata
)
}
}
return records
}
```