Files
fil/docs/snippets/python/llm/structured_extraction.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

926 B

import asyncio
from kreuzberg import extract_file, ExtractionConfig, StructuredExtractionConfig, LlmConfig

async def main() -> None:
    config = ExtractionConfig(
        structured_extraction=StructuredExtractionConfig(
            schema={
                "type": "object",
                "properties": {
                    "title": {"type": "string"},
                    "authors": {"type": "array", "items": {"type": "string"}},
                    "date": {"type": "string"},
                },
                "required": ["title", "authors", "date"],
                "additionalProperties": False,
            },
            llm=LlmConfig(model="openai/gpt-4o-mini"),
            strict=True,
        ),
    )
    result = await extract_file("paper.pdf", config=config)
    print(result.structured_output)
    # {"title": "...", "authors": ["..."], "date": "..."}

asyncio.run(main())