Files
fil/docs/snippets/python/config/element_based_output.md

30 lines
914 B
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```python title="Element-Based Output (Python)"
from kreuzberg import extract_file_sync, ExtractionConfig
# Configure element-based output
config = ExtractionConfig(result_format="element_based")
# Extract document
result = extract_file_sync("document.pdf", config=config)
# Access elements
for element in result.elements:
print(f"Type: {element.element_type}")
print(f"Text: {element.text[:100]}")
if element.metadata.page_number:
print(f"Page: {element.metadata.page_number}")
if element.metadata.coordinates:
coords = element.metadata.coordinates
print(f"Coords: ({coords.left}, {coords.top}) - ({coords.right}, {coords.bottom})")
print("---")
# Filter by element type
titles = [e for e in result.elements if e.element_type == "title"]
for title in titles:
level = title.metadata.additional.get("level", "unknown")
print(f"[{level}] {title.text}")
```