Files
fil/docs/snippets/csharp/advanced/chunking_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

3.3 KiB

using Kreuzberg;

class Program
{
    static async Task Main()
    {
        var config = new ExtractionConfig
        {
            Chunking = new ChunkingConfig
            {
                MaxChars = 1000,
                MaxOverlap = 200,
                Embedding = new EmbeddingConfig
                {
                    Model = EmbeddingModelType.Preset("all-minilm-l6-v2"),
                    Normalize = true,
                    BatchSize = 32
                }
            }
        };

        try
        {
            var result = await KreuzbergLib.ExtractFileAsync(
                "document.pdf",
                config
            ).ConfigureAwait(false);

            Console.WriteLine($"Chunks: {result.Chunks.Count}");
            foreach (var chunk in result.Chunks)
            {
                Console.WriteLine($"Content length: {chunk.Content.Length}");
                if (chunk.Embedding != null)
                {
                    Console.WriteLine($"Embedding dimensions: {chunk.Embedding.Length}");
                }
            }
        }
        catch (KreuzbergException ex)
        {
            Console.WriteLine($"Error: {ex.Message}");
        }
    }
}
using Kreuzberg;

class Program
{
    static async Task Main()
    {
        var config = new ExtractionConfig
        {
            Chunking = new ChunkingConfig
            {
                MaxChars = 500,
                MaxOverlap = 50,
                Sizing = new ChunkSizingConfig
                {
                    Type = "tokenizer",
                    Model = "Xenova/gpt-4o"
                }
            }
        };

        try
        {
            var result = await KreuzbergLib.ExtractFileAsync(
                "document.md",
                config
            ).ConfigureAwait(false);

            foreach (var chunk in result.Chunks)
            {
                if (chunk.HeadingContext?.Headings != null)
                {
                    Console.WriteLine("Headings:");
                    foreach (var heading in chunk.HeadingContext.Headings)
                    {
                        Console.WriteLine($"  Level {heading.Level}: {heading.Text}");
                    }
                }
            }
        }
        catch (KreuzbergException ex)
        {
            Console.WriteLine($"Error: {ex.Message}");
        }
    }
}
using Kreuzberg;

class Program
{
    static async Task Main()
    {
        var config = new ExtractionConfig
        {
            Chunking = new ChunkingConfig
            {
                MaxChars = 500,
                MaxOverlap = 50,
                PrependHeadingContext = true
            }
        };

        try
        {
            var result = await KreuzbergLib.ExtractFileAsync(
                "document.md",
                config
            ).ConfigureAwait(false);

            foreach (var chunk in result.Chunks)
            {
                // Each chunk's content is prefixed with its heading breadcrumb
                Console.WriteLine(chunk.Content[..Math.Min(100, chunk.Content.Length)]);
            }
        }
        catch (KreuzbergException ex)
        {
            Console.WriteLine($"Error: {ex.Message}");
        }
    }
}