Files
fil/docs/snippets/python/config/document_structure_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

499 B

from kreuzberg import extract_file_sync, ExtractionConfig

# Enable document structure output
config = ExtractionConfig(include_document_structure=True)

result = extract_file_sync("document.pdf", config=config)

# Access the document tree
if result.document:
    for node in result.document["nodes"]:
        node_type = node["content"]["node_type"]
        text = node["content"].get("text", "")
        print(f"[{node_type}] {text[:80]}")