Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val chunking = ChunkingConfig.builder()
.withMaxCharacters(1500L)
.withOverlap(200L)
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Chunks: ${result.chunks()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,35 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(500L)
.withOverlap(50L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
for (chunk in result.chunks().orEmpty()) {
val metadata = chunk.metadata()
println("Chunk ${metadata.chunkIndex() + 1}/${metadata.totalChunks()}")
println("Position: ${metadata.byteStart()}-${metadata.byteEnd()}")
val text = chunk.content()
val preview = text.take(100)
println("Content: $preview...")
chunk.embedding()?.let { vector ->
println("Embedding: ${vector.size} dimensions")
}
}
}
```

View File

@@ -0,0 +1,27 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.withBatchSize(32L)
.withShowDownloadProgress(false)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(1024L)
.withOverlap(100L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Chunks with embeddings: ${result.chunks()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,22 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val keywords = KeywordConfig.builder()
.withAlgorithm(KeywordAlgorithm.Yake)
.withMaxKeywords(10L)
.withMinScore(0.3f)
.build()
val config = ExtractionConfig.builder()
.withKeywords(Optional.of(keywords))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("research_paper.pdf"), null, config)
result.extractedKeywords()?.let { extracted ->
println("Keywords: $extracted")
}
}
```

View File

@@ -0,0 +1,22 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val config = ExtractionConfig.builder()
.withEnableQualityProcessing(true)
.build()
val result = Kreuzberg.extractFileSync(Paths.get("scanned_document.pdf"), null, config)
val score = result.qualityScore()
if (score != null) {
if (score < 0.5) {
println("Warning: Low quality extraction (%.2f)".format(score))
} else {
println("Quality score: %.2f".format(score))
}
}
}
```

View File

@@ -0,0 +1,17 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.util.Optional
fun main() {
val config = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.build()
val texts = listOf("Hello, world!", "Kreuzberg is fast")
val embeddings = Kreuzberg.embedTexts(texts, config)
println("Texts embedded: ${embeddings.size}")
println("Dimensions: ${embeddings[0].size}")
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val tokenReduction = TokenReductionOptions.builder()
.withMode("moderate")
.withPreserveImportantWords(true)
.build()
val config = ExtractionConfig.builder()
.withTokenReduction(Optional.of(tokenReduction))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println(result.content())
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val tokenReduction = TokenReductionOptions.builder()
.withMode("moderate")
.withPreserveImportantWords(true)
.build()
val config = ExtractionConfig.builder()
.withTokenReduction(Optional.of(tokenReduction))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("verbose_document.pdf"), null, config)
println("Reduced content length: ${result.content().length}")
}
```

View File

@@ -0,0 +1,52 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
data class VectorRecord(
val id: String,
val content: String,
val embedding: List<Float>,
val metadata: Map<String, String>
)
fun extractAndVectorize(documentPath: String, documentId: String): List<VectorRecord> {
val embedding = EmbeddingConfig.builder()
.withModel(EmbeddingModelType.Preset("balanced"))
.withNormalize(true)
.withBatchSize(32L)
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(512L)
.withOverlap(50L)
.withEmbedding(Optional.of(embedding))
.build()
val config = ExtractionConfig.builder()
.withChunking(Optional.of(chunking))
.build()
val result = Kreuzberg.extractFileSync(Paths.get(documentPath), null, config)
val records = mutableListOf<VectorRecord>()
val chunks = result.chunks().orEmpty()
for ((index, chunk) in chunks.withIndex()) {
val vector = chunk.embedding()
if (vector != null) {
val metadata = mapOf(
"document_id" to documentId,
"chunk_index" to index.toString(),
"content_length" to chunk.content().length.toString()
)
records += VectorRecord(
id = "${documentId}_chunk_$index",
content = chunk.content(),
embedding = vector,
metadata = metadata
)
}
}
return records
}
```