28 lines
926 B
Markdown
28 lines
926 B
Markdown
|
|
```python title="Python"
|
||
|
|
import asyncio
|
||
|
|
from kreuzberg import extract_file, ExtractionConfig, StructuredExtractionConfig, LlmConfig
|
||
|
|
|
||
|
|
async def main() -> None:
|
||
|
|
config = ExtractionConfig(
|
||
|
|
structured_extraction=StructuredExtractionConfig(
|
||
|
|
schema={
|
||
|
|
"type": "object",
|
||
|
|
"properties": {
|
||
|
|
"title": {"type": "string"},
|
||
|
|
"authors": {"type": "array", "items": {"type": "string"}},
|
||
|
|
"date": {"type": "string"},
|
||
|
|
},
|
||
|
|
"required": ["title", "authors", "date"],
|
||
|
|
"additionalProperties": False,
|
||
|
|
},
|
||
|
|
llm=LlmConfig(model="openai/gpt-4o-mini"),
|
||
|
|
strict=True,
|
||
|
|
),
|
||
|
|
)
|
||
|
|
result = await extract_file("paper.pdf", config=config)
|
||
|
|
print(result.structured_output)
|
||
|
|
# {"title": "...", "authors": ["..."], "date": "..."}
|
||
|
|
|
||
|
|
asyncio.run(main())
|
||
|
|
```
|