This commit is contained in:
48
docs/snippets/rust/config/element_based_output.md
Normal file
48
docs/snippets/rust/config/element_based_output.md
Normal file
@@ -0,0 +1,48 @@
|
||||
```rust title="Element-Based Output (Rust)"
|
||||
use kreuzberg::{extract_file_sync, ExtractionConfig};
|
||||
use kreuzberg::types::OutputFormat as ResultFormat;
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
// Configure element-based output (result_format controls Unified vs ElementBased)
|
||||
let config = ExtractionConfig {
|
||||
result_format: ResultFormat::ElementBased,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Extract document
|
||||
let result = extract_file_sync("document.pdf", None, &config)?;
|
||||
|
||||
// Access elements
|
||||
if let Some(elements) = result.elements {
|
||||
for element in &elements {
|
||||
println!("Type: {:?}", element.element_type);
|
||||
println!("Text: {}", &element.text[..100.min(element.text.len())]);
|
||||
|
||||
if let Some(page) = element.metadata.page_number {
|
||||
println!("Page: {}", page);
|
||||
}
|
||||
|
||||
if let Some(coords) = &element.metadata.coordinates {
|
||||
println!("Coords: ({}, {}) - ({}, {})",
|
||||
coords.x0, coords.y0, coords.x1, coords.y1);
|
||||
}
|
||||
|
||||
println!("---");
|
||||
}
|
||||
|
||||
// Filter by element type
|
||||
let titles: Vec<_> = elements.iter()
|
||||
.filter(|e| matches!(e.element_type, kreuzberg::types::ElementType::Title))
|
||||
.collect();
|
||||
|
||||
for title in titles {
|
||||
let level = title.metadata.additional.get("level")
|
||||
.map(|v| v.as_ref())
|
||||
.unwrap_or("unknown");
|
||||
println!("[{}] {}", level, title.text);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user