This commit is contained in:
45
docs/snippets/kotlin/ocr/cloud_ocr_backend.md
Normal file
45
docs/snippets/kotlin/ocr/cloud_ocr_backend.md
Normal file
@@ -0,0 +1,45 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Path
|
||||
|
||||
class CloudOcrBackend(
|
||||
private val apiKey: String,
|
||||
private val supportedLangs: List<String>,
|
||||
) : IOcrBackend {
|
||||
|
||||
override fun name(): String = "cloud-ocr"
|
||||
|
||||
override fun version(): String = "1.0.0"
|
||||
|
||||
override fun process_image(image_bytes: ByteArray, config: OcrConfig): ExtractionResult {
|
||||
val text = callCloudApi(image_bytes, config.language())
|
||||
return ExtractionResult.builder()
|
||||
.withContent(text)
|
||||
.withMimeType("text/plain")
|
||||
.withMetadata(Metadata.builder().build())
|
||||
.build()
|
||||
}
|
||||
|
||||
override fun process_image_file(path: Path, config: OcrConfig): ExtractionResult {
|
||||
return process_image(java.nio.file.Files.readAllBytes(path), config)
|
||||
}
|
||||
|
||||
override fun supports_language(lang: String): Boolean = supportedLangs.contains(lang)
|
||||
|
||||
override fun backend_type(): OcrBackendType = OcrBackendType.Custom
|
||||
|
||||
override fun supported_languages(): List<String> = supportedLangs
|
||||
|
||||
override fun supports_table_detection(): Boolean = false
|
||||
|
||||
override fun supports_document_processing(): Boolean = false
|
||||
|
||||
override fun process_document(_path: Path, _config: OcrConfig): ExtractionResult {
|
||||
throw UnsupportedOperationException("document processing not supported")
|
||||
}
|
||||
|
||||
private fun callCloudApi(image: ByteArray, language: String): String {
|
||||
return "Extracted text"
|
||||
}
|
||||
}
|
||||
```
|
||||
18
docs/snippets/kotlin/ocr/image_extraction.md
Normal file
18
docs/snippets/kotlin/ocr/image_extraction.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val images = ImageExtractionConfig.builder()
|
||||
.withExtractImages(true)
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withImages(Optional.of(images))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
println("Extracted images: ${result.images()?.size ?: 0}")
|
||||
}
|
||||
```
|
||||
20
docs/snippets/kotlin/ocr/image_preprocessing.md
Normal file
20
docs/snippets/kotlin/ocr/image_preprocessing.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val images = ImageExtractionConfig.builder()
|
||||
.withExtractImages(true)
|
||||
.withTargetDpi(300)
|
||||
.withMaxImageDimension(4096)
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withImages(Optional.of(images))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
println("Extracted images: ${result.images()?.size ?: 0}")
|
||||
}
|
||||
```
|
||||
19
docs/snippets/kotlin/ocr/ocr_easyocr.md
Normal file
19
docs/snippets/kotlin/ocr/ocr_easyocr.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("easyocr")
|
||||
.withLanguage("en")
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
println("Extracted text: ${result.content()}")
|
||||
}
|
||||
```
|
||||
31
docs/snippets/kotlin/ocr/ocr_elements.md
Normal file
31
docs/snippets/kotlin/ocr/ocr_elements.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val elementConfig = OcrElementConfig.builder()
|
||||
.withIncludeElements(true)
|
||||
.build()
|
||||
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("paddleocr")
|
||||
.withLanguage("en")
|
||||
.withElementConfig(Optional.of(elementConfig))
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("scanned.pdf"), null, config)
|
||||
|
||||
result.ocrElements()?.forEach { element ->
|
||||
println("Text: ${element.text()}")
|
||||
println("Confidence: ${element.confidence().recognition()}")
|
||||
println("Geometry: ${element.geometry()}")
|
||||
element.rotation()?.let { println("Rotation: ${it}") }
|
||||
println()
|
||||
}
|
||||
}
|
||||
```
|
||||
19
docs/snippets/kotlin/ocr/ocr_extraction.md
Normal file
19
docs/snippets/kotlin/ocr/ocr_extraction.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("tesseract")
|
||||
.withLanguage("eng")
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("scanned.pdf"), null, config)
|
||||
println(result.content())
|
||||
}
|
||||
```
|
||||
20
docs/snippets/kotlin/ocr/ocr_force_all_pages.md
Normal file
20
docs/snippets/kotlin/ocr/ocr_force_all_pages.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("tesseract")
|
||||
.withLanguage("eng")
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.withForceOcr(true)
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
println(result.content())
|
||||
}
|
||||
```
|
||||
19
docs/snippets/kotlin/ocr/ocr_multi_language.md
Normal file
19
docs/snippets/kotlin/ocr/ocr_multi_language.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("tesseract")
|
||||
.withLanguage("eng+deu")
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("multilingual.pdf"), null, config)
|
||||
println(result.content())
|
||||
}
|
||||
```
|
||||
19
docs/snippets/kotlin/ocr/ocr_paddleocr.md
Normal file
19
docs/snippets/kotlin/ocr/ocr_paddleocr.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("paddleocr")
|
||||
.withLanguage("en")
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
println("Extracted text: ${result.content()}")
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user