Files
fil/docs/snippets/python/config/element_based_output.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

914 B

from kreuzberg import extract_file_sync, ExtractionConfig

# Configure element-based output
config = ExtractionConfig(result_format="element_based")

# Extract document
result = extract_file_sync("document.pdf", config=config)

# Access elements
for element in result.elements:
    print(f"Type: {element.element_type}")
    print(f"Text: {element.text[:100]}")

    if element.metadata.page_number:
        print(f"Page: {element.metadata.page_number}")

    if element.metadata.coordinates:
        coords = element.metadata.coordinates
        print(f"Coords: ({coords.left}, {coords.top}) - ({coords.right}, {coords.bottom})")

    print("---")

# Filter by element type
titles = [e for e in result.elements if e.element_type == "title"]
for title in titles:
    level = title.metadata.additional.get("level", "unknown")
    print(f"[{level}] {title.text}")