Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Path
class CloudOcrBackend(
private val apiKey: String,
private val supportedLangs: List<String>,
) : IOcrBackend {
override fun name(): String = "cloud-ocr"
override fun version(): String = "1.0.0"
override fun process_image(image_bytes: ByteArray, config: OcrConfig): ExtractionResult {
val text = callCloudApi(image_bytes, config.language())
return ExtractionResult.builder()
.withContent(text)
.withMimeType("text/plain")
.withMetadata(Metadata.builder().build())
.build()
}
override fun process_image_file(path: Path, config: OcrConfig): ExtractionResult {
return process_image(java.nio.file.Files.readAllBytes(path), config)
}
override fun supports_language(lang: String): Boolean = supportedLangs.contains(lang)
override fun backend_type(): OcrBackendType = OcrBackendType.Custom
override fun supported_languages(): List<String> = supportedLangs
override fun supports_table_detection(): Boolean = false
override fun supports_document_processing(): Boolean = false
override fun process_document(_path: Path, _config: OcrConfig): ExtractionResult {
throw UnsupportedOperationException("document processing not supported")
}
private fun callCloudApi(image: ByteArray, language: String): String {
return "Extracted text"
}
}
```

View File

@@ -0,0 +1,18 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val images = ImageExtractionConfig.builder()
.withExtractImages(true)
.build()
val config = ExtractionConfig.builder()
.withImages(Optional.of(images))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Extracted images: ${result.images()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,20 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val images = ImageExtractionConfig.builder()
.withExtractImages(true)
.withTargetDpi(300)
.withMaxImageDimension(4096)
.build()
val config = ExtractionConfig.builder()
.withImages(Optional.of(images))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Extracted images: ${result.images()?.size ?: 0}")
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("easyocr")
.withLanguage("en")
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Extracted text: ${result.content()}")
}
```

View File

@@ -0,0 +1,31 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val elementConfig = OcrElementConfig.builder()
.withIncludeElements(true)
.build()
val ocr = OcrConfig.builder()
.withBackend("paddleocr")
.withLanguage("en")
.withElementConfig(Optional.of(elementConfig))
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("scanned.pdf"), null, config)
result.ocrElements()?.forEach { element ->
println("Text: ${element.text()}")
println("Confidence: ${element.confidence().recognition()}")
println("Geometry: ${element.geometry()}")
element.rotation()?.let { println("Rotation: ${it}") }
println()
}
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("tesseract")
.withLanguage("eng")
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("scanned.pdf"), null, config)
println(result.content())
}
```

View File

@@ -0,0 +1,20 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("tesseract")
.withLanguage("eng")
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.withForceOcr(true)
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println(result.content())
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("tesseract")
.withLanguage("eng+deu")
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("multilingual.pdf"), null, config)
println(result.content())
}
```

View File

@@ -0,0 +1,19 @@
```kotlin title="Kotlin"
import dev.kreuzberg.*
import java.nio.file.Paths
import java.util.Optional
fun main() {
val ocr = OcrConfig.builder()
.withBackend("paddleocr")
.withLanguage("en")
.build()
val config = ExtractionConfig.builder()
.withOcr(Optional.of(ocr))
.build()
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
println("Extracted text: ${result.content()}")
}
```