docs/snippets/python/metadata/vector_database_integration.md

```python title="Python"
from kreuzberg import extract_file_sync, ExtractionConfig, ChunkingConfig, EmbeddingConfig

config = ExtractionConfig(
    chunking=ChunkingConfig(
        max_characters=512,
        overlap=50,
        embedding=EmbeddingConfig(
            normalize=True,
            batch_size=32,
            preset="balanced",
        ),
    ),
)

result = extract_file_sync("document.pdf", config=config)

records: list[dict] = []
if result.chunks:
    for index, chunk in enumerate(result.chunks):
        if chunk.embedding is None:
            continue
        records.append({
            "id": f"document_chunk_{index}",
            "content": chunk.content,
            "embedding": chunk.embedding,
            "metadata": {
                "document_id": "document.pdf",
                "chunk_index": index,
                "content_length": len(chunk.content),
            },
        })

print(f"Prepared {len(records)} vector records")
```
Nomad changes 2026-06-01 23:40:55 +02:00			```python title="Python"
			`from kreuzberg import extract_file_sync, ExtractionConfig, ChunkingConfig, EmbeddingConfig`

			`config = ExtractionConfig(`
			`chunking=ChunkingConfig(`
			`max_characters=512,`
			`overlap=50,`
			`embedding=EmbeddingConfig(`
			`normalize=True,`
			`batch_size=32,`
			`preset="balanced",`
			`),`
			`),`
			`)`

			`result = extract_file_sync("document.pdf", config=config)`

			`records: list[dict] = []`
			`if result.chunks:`
			`for index, chunk in enumerate(result.chunks):`
			`if chunk.embedding is None:`
			`continue`
			`records.append({`
			`"id": f"document_chunk_{index}",`
			`"content": chunk.content,`
			`"embedding": chunk.embedding,`
			`"metadata": {`
			`"document_id": "document.pdf",`
			`"chunk_index": index,`
			`"content_length": len(chunk.content),`
			`},`
			`})`

			`print(f"Prepared {len(records)} vector records")`
			```