This commit is contained in:
16
docs/snippets/kotlin/api/batch_extract_bytes_sync.md
Normal file
16
docs/snippets/kotlin/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
|
||||
fun main() {
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val items = listOf(
|
||||
BatchBytesItem("Hello, world!".toByteArray(), "text/plain", null),
|
||||
BatchBytesItem("# Heading\n\nParagraph text.".toByteArray(), "text/markdown", null),
|
||||
)
|
||||
val results = Kreuzberg.batchExtractBytesSync(items, config)
|
||||
|
||||
results.forEachIndexed { index, result ->
|
||||
println("Item $index: ${result.content().length} chars")
|
||||
}
|
||||
}
|
||||
```
|
||||
18
docs/snippets/kotlin/api/batch_extract_files_sync.md
Normal file
18
docs/snippets/kotlin/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() {
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val items = listOf(
|
||||
BatchFileItem(Paths.get("doc1.pdf"), null),
|
||||
BatchFileItem(Paths.get("doc2.docx"), null),
|
||||
BatchFileItem(Paths.get("report.pdf"), null),
|
||||
)
|
||||
val results = Kreuzberg.batchExtractFilesSync(items, config)
|
||||
|
||||
results.forEachIndexed { index, result ->
|
||||
println("File $index: ${result.content().length} chars")
|
||||
}
|
||||
}
|
||||
```
|
||||
31
docs/snippets/kotlin/api/client_chunk_text.md
Normal file
31
docs/snippets/kotlin/api/client_chunk_text.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```kotlin title="Kotlin"
|
||||
import java.net.URI
|
||||
import java.net.http.HttpClient
|
||||
import java.net.http.HttpRequest
|
||||
import java.net.http.HttpResponse
|
||||
|
||||
fun main() {
|
||||
val client = HttpClient.newHttpClient()
|
||||
val json = """
|
||||
{
|
||||
"text": "Your long text here...",
|
||||
"chunker_type": "text",
|
||||
"config": {
|
||||
"max_characters": 1000,
|
||||
"overlap": 50,
|
||||
"trim": true
|
||||
}
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val request = HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://localhost:8000/chunk"))
|
||||
.header("Content-Type", "application/json")
|
||||
.POST(HttpRequest.BodyPublishers.ofString(json))
|
||||
.build()
|
||||
|
||||
val response = client.send(request, HttpResponse.BodyHandlers.ofString())
|
||||
println("Status: ${response.statusCode()}")
|
||||
println(response.body())
|
||||
}
|
||||
```
|
||||
38
docs/snippets/kotlin/api/client_extract_single_file.md
Normal file
38
docs/snippets/kotlin/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,38 @@
|
||||
```kotlin title="Kotlin"
|
||||
import java.net.URI
|
||||
import java.net.http.HttpClient
|
||||
import java.net.http.HttpRequest
|
||||
import java.net.http.HttpResponse
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() {
|
||||
val client = HttpClient.newHttpClient()
|
||||
val path = Paths.get("document.pdf")
|
||||
val bytes = Files.readAllBytes(path)
|
||||
val fileName = path.fileName.toString()
|
||||
|
||||
val boundary = "----KreuzbergBoundary${System.currentTimeMillis()}"
|
||||
val crlf = "\r\n"
|
||||
val header = (
|
||||
"--$boundary$crlf" +
|
||||
"Content-Disposition: form-data; name=\"file\"; filename=\"$fileName\"$crlf" +
|
||||
"Content-Type: application/pdf$crlf$crlf"
|
||||
).toByteArray()
|
||||
val footer = "$crlf--$boundary--$crlf".toByteArray()
|
||||
|
||||
val body = ByteArray(header.size + bytes.size + footer.size)
|
||||
System.arraycopy(header, 0, body, 0, header.size)
|
||||
System.arraycopy(bytes, 0, body, header.size, bytes.size)
|
||||
System.arraycopy(footer, 0, body, header.size + bytes.size, footer.size)
|
||||
|
||||
val request = HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://localhost:8000/extract"))
|
||||
.header("Content-Type", "multipart/form-data; boundary=$boundary")
|
||||
.POST(HttpRequest.BodyPublishers.ofByteArray(body))
|
||||
.build()
|
||||
|
||||
val response = client.send(request, HttpResponse.BodyHandlers.ofString())
|
||||
println(response.body())
|
||||
}
|
||||
```
|
||||
45
docs/snippets/kotlin/api/combining_all_features.md
Normal file
45
docs/snippets/kotlin/api/combining_all_features.md
Normal file
@@ -0,0 +1,45 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
val ocr = OcrConfig.builder()
|
||||
.withBackend("tesseract")
|
||||
.withLanguage("eng")
|
||||
.build()
|
||||
|
||||
val chunking = ChunkingConfig.builder()
|
||||
.withMaxCharacters(800L)
|
||||
.withOverlap(100L)
|
||||
.withChunkerType(ChunkerType.MARKDOWN)
|
||||
.withPrependHeadingContext(true)
|
||||
.build()
|
||||
|
||||
val images = ImageExtractionConfig.builder()
|
||||
.withExtractImages(true)
|
||||
.build()
|
||||
|
||||
val config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(ocr))
|
||||
.withForceOcr(false)
|
||||
.withChunking(Optional.of(chunking))
|
||||
.withOutputFormat(OutputFormat.MARKDOWN)
|
||||
.withIncludeDocumentStructure(true)
|
||||
.withImages(Optional.of(images))
|
||||
.withUseCache(true)
|
||||
.withEnableQualityProcessing(true)
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("report.pdf"), null, config)
|
||||
|
||||
val content = result.content()
|
||||
println("Content (${content.length} chars):")
|
||||
println(content.take(200))
|
||||
|
||||
result.chunks()?.let { println("\nChunks: ${it.size}") }
|
||||
println("Tables: ${result.tables()?.size ?: 0}")
|
||||
result.detectedLanguages()?.let { println("Languages: $it") }
|
||||
result.extractionMethod()?.let { println("Extraction method: $it") }
|
||||
}
|
||||
```
|
||||
17
docs/snippets/kotlin/api/error_handling.md
Normal file
17
docs/snippets/kotlin/api/error_handling.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() {
|
||||
val config = ExtractionConfig.builder().build()
|
||||
try {
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
println(result.content())
|
||||
} catch (e: KreuzbergRsException) {
|
||||
System.err.println("Extraction failed: ${e.message}")
|
||||
System.err.println("Error code: ${e.code}")
|
||||
} catch (e: Exception) {
|
||||
System.err.println("Unexpected error: ${e.message}")
|
||||
}
|
||||
}
|
||||
```
|
||||
28
docs/snippets/kotlin/api/error_handling_extract.md
Normal file
28
docs/snippets/kotlin/api/error_handling_extract.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun extractText(bytes: ByteArray, mimeType: String): String {
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val result = Kreuzberg.extractBytesSync(bytes, mimeType, config)
|
||||
return result.content()
|
||||
}
|
||||
|
||||
fun main() {
|
||||
val bytes = try {
|
||||
Files.readAllBytes(Paths.get("document.pdf"))
|
||||
} catch (e: Exception) {
|
||||
ByteArray(0)
|
||||
}
|
||||
|
||||
try {
|
||||
val text = extractText(bytes, "application/pdf")
|
||||
println("Extracted ${text.length} chars")
|
||||
} catch (e: KreuzbergRsException) {
|
||||
System.err.println("Extraction error (code=${e.code}): ${e.message}")
|
||||
} catch (e: Exception) {
|
||||
System.err.println("Unexpected error: ${e.message}")
|
||||
}
|
||||
}
|
||||
```
|
||||
16
docs/snippets/kotlin/api/extract_bytes_async.md
Normal file
16
docs/snippets/kotlin/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import dev.kreuzberg.kt.Kreuzberg
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() = runBlocking {
|
||||
val content = Files.readAllBytes(Paths.get("document.pdf"))
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val result = Kreuzberg.extractBytes(content, "application/pdf", config)
|
||||
|
||||
println(result.content())
|
||||
println("Tables: ${result.tables()?.size ?: 0}")
|
||||
}
|
||||
```
|
||||
14
docs/snippets/kotlin/api/extract_bytes_sync.md
Normal file
14
docs/snippets/kotlin/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() {
|
||||
val content = Files.readAllBytes(Paths.get("document.pdf"))
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val result = Kreuzberg.extractBytesSync(content, "application/pdf", config)
|
||||
|
||||
println(result.content())
|
||||
println("Tables: ${result.tables()?.size ?: 0}")
|
||||
}
|
||||
```
|
||||
15
docs/snippets/kotlin/api/extract_file_async.md
Normal file
15
docs/snippets/kotlin/api/extract_file_async.md
Normal file
@@ -0,0 +1,15 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import dev.kreuzberg.kt.Kreuzberg
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() = runBlocking {
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val result = Kreuzberg.extractFile(Paths.get("document.pdf"), null, config)
|
||||
|
||||
println(result.content())
|
||||
println("MIME type: ${result.mimeType()}")
|
||||
println("Tables: ${result.tables()?.size ?: 0}")
|
||||
}
|
||||
```
|
||||
13
docs/snippets/kotlin/api/extract_file_sync.md
Normal file
13
docs/snippets/kotlin/api/extract_file_sync.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
|
||||
fun main() {
|
||||
val config = ExtractionConfig.builder().build()
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
|
||||
println(result.content())
|
||||
println("MIME type: ${result.mimeType()}")
|
||||
println("Tables: ${result.tables()?.size ?: 0}")
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user