Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
fun main() {
val config = ExtractionConfig.builder().build()
val items = listOf(
BatchBytesItem("Hello, world!".toByteArray(), "text/plain", null),
BatchBytesItem("# Heading\n\nParagraph text.".toByteArray(), "text/markdown", null),
)
val results = Kreuzberg.batchExtractBytesSync(items, config)
results.forEachIndexed { index, result ->
println("Item $index: ${result.content().length} chars")
}
}
```

View File

@@ -0,0 +1,18 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
fun main() {
val config = ExtractionConfig.builder().build()
val items = listOf(
BatchFileItem(Paths.get("doc1.pdf"), null),
BatchFileItem(Paths.get("doc2.docx"), null),
BatchFileItem(Paths.get("report.pdf"), null),
)
val results = Kreuzberg.batchExtractFilesSync(items, config)
results.forEachIndexed { index, result ->
println("File $index: ${result.content().length} chars")
}
}
```

View File

@@ -0,0 +1,31 @@
```kotlin title="Kotlin"
import java.net.URI
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse
fun main() {
val client = HttpClient.newHttpClient()
val json = """
{
"text": "Your long text here...",
"chunker_type": "text",
"config": {
"max_characters": 1000,
"overlap": 50,
"trim": true
}
}
""".trimIndent()
val request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:8000/chunk"))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(json))
.build()
val response = client.send(request, HttpResponse.BodyHandlers.ofString())
println("Status: ${response.statusCode()}")
println(response.body())
}
```

View File

@@ -0,0 +1,38 @@
```kotlin title="Kotlin"
import java.net.URI
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse
import java.nio.file.Files
import java.nio.file.Paths
fun main() {
val client = HttpClient.newHttpClient()
val path = Paths.get("document.pdf")
val bytes = Files.readAllBytes(path)
val fileName = path.fileName.toString()
val boundary = "----KreuzbergBoundary${System.currentTimeMillis()}"
val crlf = "\r\n"
val header = (
"--$boundary$crlf" +
"Content-Disposition: form-data; name=\"file\"; filename=\"$fileName\"$crlf" +
"Content-Type: application/pdf$crlf$crlf"
).toByteArray()
val footer = "$crlf--$boundary--$crlf".toByteArray()
val body = ByteArray(header.size + bytes.size + footer.size)
System.arraycopy(header, 0, body, 0, header.size)
System.arraycopy(bytes, 0, body, header.size, bytes.size)
System.arraycopy(footer, 0, body, header.size + bytes.size, footer.size)
val request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:8000/extract"))
.header("Content-Type", "multipart/form-data; boundary=$boundary")
.POST(HttpRequest.BodyPublishers.ofByteArray(body))
.build()
val response = client.send(request, HttpResponse.BodyHandlers.ofString())
println(response.body())
}
```

View File

@@ -0,0 +1,45 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("tesseract")
.withLanguage("eng")
.build()
val chunking = ChunkingConfig.builder()
.withMaxCharacters(800L)
.withOverlap(100L)
.withChunkerType(ChunkerType.MARKDOWN)
.withPrependHeadingContext(true)
.build()
val images = ImageExtractionConfig.builder()
.withExtractImages(true)
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.withForceOcr(false)
.withChunking(Optional.of(chunking))
.withOutputFormat(OutputFormat.MARKDOWN)
.withIncludeDocumentStructure(true)
.withImages(Optional.of(images))
.withUseCache(true)
.withEnableQualityProcessing(true)
.build()
val result = Kreuzberg.extractFileSync(Paths.get("report.pdf"), null, config)
val content = result.content()
println("Content (${content.length} chars):")
println(content.take(200))
result.chunks()?.let { println("\nChunks: ${it.size}") }
println("Tables: ${result.tables()?.size ?: 0}")
result.detectedLanguages()?.let { println("Languages: $it") }
result.extractionMethod()?.let { println("Extraction method: $it") }
}
```

View File

@@ -0,0 +1,17 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
fun main() {
val config = ExtractionConfig.builder().build()
try {
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println(result.content())
} catch (e: KreuzbergRsException) {
System.err.println("Extraction failed: ${e.message}")
System.err.println("Error code: ${e.code}")
} catch (e: Exception) {
System.err.println("Unexpected error: ${e.message}")
}
}
```

View File

@@ -0,0 +1,28 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Files
import java.nio.file.Paths
fun extractText(bytes: ByteArray, mimeType: String): String {
val config = ExtractionConfig.builder().build()
val result = Kreuzberg.extractBytesSync(bytes, mimeType, config)
return result.content()
}
fun main() {
val bytes = try {
Files.readAllBytes(Paths.get("document.pdf"))
} catch (e: Exception) {
ByteArray(0)
}
try {
val text = extractText(bytes, "application/pdf")
println("Extracted ${text.length} chars")
} catch (e: KreuzbergRsException) {
System.err.println("Extraction error (code=${e.code}): ${e.message}")
} catch (e: Exception) {
System.err.println("Unexpected error: ${e.message}")
}
}
```

View File

@@ -0,0 +1,16 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import dev.kreuzberg.kt.Kreuzberg
import kotlinx.coroutines.runBlocking
import java.nio.file.Files
import java.nio.file.Paths
fun main() = runBlocking {
val content = Files.readAllBytes(Paths.get("document.pdf"))
val config = ExtractionConfig.builder().build()
val result = Kreuzberg.extractBytes(content, "application/pdf", config)
println(result.content())
println("Tables: ${result.tables()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,14 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Files
import java.nio.file.Paths
fun main() {
val content = Files.readAllBytes(Paths.get("document.pdf"))
val config = ExtractionConfig.builder().build()
val result = Kreuzberg.extractBytesSync(content, "application/pdf", config)
println(result.content())
println("Tables: ${result.tables()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,15 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import dev.kreuzberg.kt.Kreuzberg
import kotlinx.coroutines.runBlocking
import java.nio.file.Paths
fun main() = runBlocking {
val config = ExtractionConfig.builder().build()
val result = Kreuzberg.extractFile(Paths.get("document.pdf"), null, config)
println(result.content())
println("MIME type: ${result.mimeType()}")
println("Tables: ${result.tables()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,13 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
fun main() {
val config = ExtractionConfig.builder().build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println(result.content())
println("MIME type: ${result.mimeType()}")
println("Tables: ${result.tables()?.size ?: 0}")
}
```