Files
fil/docs/snippets/python/metadata/vector_database_integration.md

36 lines
967 B
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```python title="Python"
from kreuzberg import extract_file_sync, ExtractionConfig, ChunkingConfig, EmbeddingConfig
config = ExtractionConfig(
chunking=ChunkingConfig(
max_characters=512,
overlap=50,
embedding=EmbeddingConfig(
normalize=True,
batch_size=32,
preset="balanced",
),
),
)
result = extract_file_sync("document.pdf", config=config)
records: list[dict] = []
if result.chunks:
for index, chunk in enumerate(result.chunks):
if chunk.embedding is None:
continue
records.append({
"id": f"document_chunk_{index}",
"content": chunk.content,
"embedding": chunk.embedding,
"metadata": {
"document_id": "document.pdf",
"chunk_index": index,
"content_length": len(chunk.content),
},
})
print(f"Prepared {len(records)} vector records")
```