This commit is contained in:
32
docs/snippets/kotlin/config/element_based_output.md
Normal file
32
docs/snippets/kotlin/config/element_based_output.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```kotlin title="Kotlin"
|
||||
import dev.kreuzberg.*
|
||||
import java.nio.file.Paths
|
||||
import java.util.Optional
|
||||
|
||||
fun main() {
|
||||
// Configure element-based output (resultFormat controls Unified vs ElementBased)
|
||||
val config = ExtractionConfig.builder()
|
||||
.withResultFormat(ResultFormat.ElementBased)
|
||||
.build()
|
||||
|
||||
val result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), null, config)
|
||||
|
||||
val elements = result.elements().orEmpty()
|
||||
for (element in elements) {
|
||||
println("Type: ${element.elementType()}")
|
||||
val text = element.text()
|
||||
println("Text: ${text.take(100)}")
|
||||
|
||||
element.metadata().pageNumber()?.let { page ->
|
||||
println("Page: $page")
|
||||
}
|
||||
println("---")
|
||||
}
|
||||
|
||||
// Filter by element type
|
||||
val titles = elements.filter { it.elementType() == ElementType.Title }
|
||||
for (title in titles) {
|
||||
println("Title: ${title.text()}")
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user