Files
fil/docs/snippets/rust/config/element_based_output.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.5 KiB

use kreuzberg::{extract_file_sync, ExtractionConfig};
use kreuzberg::types::OutputFormat as ResultFormat;

fn main() -> kreuzberg::Result<()> {
    // Configure element-based output (result_format controls Unified vs ElementBased)
    let config = ExtractionConfig {
        result_format: ResultFormat::ElementBased,
        ..Default::default()
    };

    // Extract document
    let result = extract_file_sync("document.pdf", None, &config)?;

    // Access elements
    if let Some(elements) = result.elements {
        for element in &elements {
            println!("Type: {:?}", element.element_type);
            println!("Text: {}", &element.text[..100.min(element.text.len())]);

            if let Some(page) = element.metadata.page_number {
                println!("Page: {}", page);
            }

            if let Some(coords) = &element.metadata.coordinates {
                println!("Coords: ({}, {}) - ({}, {})",
                    coords.x0, coords.y0, coords.x1, coords.y1);
            }

            println!("---");
        }

        // Filter by element type
        let titles: Vec<_> = elements.iter()
            .filter(|e| matches!(e.element_type, kreuzberg::types::ElementType::Title))
            .collect();

        for title in titles {
            let level = title.metadata.additional.get("level")
                .map(|v| v.as_ref())
                .unwrap_or("unknown");
            println!("[{}] {}", level, title.text);
        }
    }

    Ok(())
}