Files
fil/docs/snippets/rust/config/element_based_output.md

49 lines
1.5 KiB
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```rust title="Element-Based Output (Rust)"
use kreuzberg::{extract_file_sync, ExtractionConfig};
use kreuzberg::types::OutputFormat as ResultFormat;
fn main() -> kreuzberg::Result<()> {
// Configure element-based output (result_format controls Unified vs ElementBased)
let config = ExtractionConfig {
result_format: ResultFormat::ElementBased,
..Default::default()
};
// Extract document
let result = extract_file_sync("document.pdf", None, &config)?;
// Access elements
if let Some(elements) = result.elements {
for element in &elements {
println!("Type: {:?}", element.element_type);
println!("Text: {}", &element.text[..100.min(element.text.len())]);
if let Some(page) = element.metadata.page_number {
println!("Page: {}", page);
}
if let Some(coords) = &element.metadata.coordinates {
println!("Coords: ({}, {}) - ({}, {})",
coords.x0, coords.y0, coords.x1, coords.y1);
}
println!("---");
}
// Filter by element type
let titles: Vec<_> = elements.iter()
.filter(|e| matches!(e.element_type, kreuzberg::types::ElementType::Title))
.collect();
for title in titles {
let level = title.metadata.additional.get("level")
.map(|v| v.as_ref())
.unwrap_or("unknown");
println!("[{}] {}", level, title.text);
}
}
Ok(())
}
```