This commit is contained in:
15
docs/snippets/python/config/document_structure_config.md
Normal file
15
docs/snippets/python/config/document_structure_config.md
Normal file
@@ -0,0 +1,15 @@
|
||||
```python title="Document Structure Config (Python)"
|
||||
from kreuzberg import extract_file_sync, ExtractionConfig
|
||||
|
||||
# Enable document structure output
|
||||
config = ExtractionConfig(include_document_structure=True)
|
||||
|
||||
result = extract_file_sync("document.pdf", config=config)
|
||||
|
||||
# Access the document tree
|
||||
if result.document:
|
||||
for node in result.document["nodes"]:
|
||||
node_type = node["content"]["node_type"]
|
||||
text = node["content"].get("text", "")
|
||||
print(f"[{node_type}] {text[:80]}")
|
||||
```
|
||||
Reference in New Issue
Block a user