Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
```python title="Python"
import asyncio
from kreuzberg import extract_file, ExtractionConfig, StructuredExtractionConfig, LlmConfig
async def main() -> None:
config = ExtractionConfig(
structured_extraction=StructuredExtractionConfig(
schema={
"type": "object",
"properties": {
"title": {"type": "string"},
"authors": {"type": "array", "items": {"type": "string"}},
"date": {"type": "string"},
},
"required": ["title", "authors", "date"],
"additionalProperties": False,
},
llm=LlmConfig(model="openai/gpt-4o-mini"),
strict=True,
),
)
result = await extract_file("paper.pdf", config=config)
print(result.structured_output)
# {"title": "...", "authors": ["..."], "date": "..."}
asyncio.run(main())
```

View File

@@ -0,0 +1,16 @@
```python title="Python"
import asyncio
from kreuzberg import embed, EmbeddingConfig, EmbeddingModelType, LlmConfig
async def main() -> None:
config = EmbeddingConfig(
model=EmbeddingModelType.llm(
LlmConfig(model="openai/text-embedding-3-small")
),
normalize=True,
)
embeddings = await embed(["Hello world"], config=config)
print(len(embeddings[0])) # 1536
asyncio.run(main())
```

View File

@@ -0,0 +1,17 @@
```python title="Python"
import asyncio
from kreuzberg import extract_file, ExtractionConfig, OcrConfig, LlmConfig
async def main() -> None:
config = ExtractionConfig(
force_ocr=True,
ocr=OcrConfig(
backend="vlm",
vlm_config=LlmConfig(model="openai/gpt-4o-mini"),
),
)
result = await extract_file("scan.pdf", config=config)
print(result.content)
asyncio.run(main())
```