Files
fil/docs/snippets/python/advanced/chunking_rag.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.0 KiB

import asyncio
from kreuzberg import (
    ExtractionConfig,
    ChunkingConfig,
    EmbeddingConfig,
    EmbeddingModelType,
    extract_file,
)


async def main() -> None:
    config: ExtractionConfig = ExtractionConfig(
        chunking=ChunkingConfig(
            max_characters=500,
            overlap=50,
            embedding=EmbeddingConfig(
                model=EmbeddingModelType({"type": "preset", "name": "balanced"}),
                normalize=True,
            ),
        )
    )

    result = await extract_file("research_paper.pdf", config=config)

    for chunk in result.chunks or []:
        print(
            f"Chunk {chunk.metadata.chunk_index + 1}/{chunk.metadata.total_chunks}"
        )
        print(
            f"Position: {chunk.metadata.byte_start}-{chunk.metadata.byte_end}"
        )
        print(f"Content: {chunk.content[:100]}...")
        if chunk.embedding is not None:
            print(f"Embedding: {len(chunk.embedding)} dimensions")


asyncio.run(main())