Files
fil/docs/snippets/csharp/advanced/chunking_rag.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

2.4 KiB

using Kreuzberg;
using System.Collections.Generic;
using System.Linq;

class RagPipelineExample
{
    static async Task Main()
    {
        var config = new ExtractionConfig
        {
            Chunking = new ChunkingConfig
            {
                MaxChars = 500,
                MaxOverlap = 50,
                Embedding = new EmbeddingConfig
                {
                    Model = EmbeddingModelType.Preset("all-mpnet-base-v2"),
                    Normalize = true,
                    BatchSize = 16
                }
            }
        };

        try
        {
            var result = await KreuzbergLib.ExtractFileAsync(
                "research_paper.pdf",
                config
            ).ConfigureAwait(false);

            var vectorStore = await BuildVectorStoreAsync(result.Chunks)
                .ConfigureAwait(false);

            var query = "machine learning optimization";
            var relevantChunks = await SearchAsync(vectorStore, query)
                .ConfigureAwait(false);

            Console.WriteLine($"Found {relevantChunks.Count} relevant chunks");
            foreach (var chunk in relevantChunks.Take(3))
            {
                Console.WriteLine($"Content: {chunk.Content[..80]}...");
                Console.WriteLine($"Similarity: {chunk.Similarity:F3}\n");
            }
        }
        catch (KreuzbergException ex)
        {
            Console.WriteLine($"Error: {ex.Message}");
        }
    }

    static async Task<List<VectorEntry>> BuildVectorStoreAsync(
        IEnumerable<Chunk> chunks)
    {
        return await Task.Run(() =>
        {
            return chunks.Select(c => new VectorEntry
            {
                Content = c.Content,
                Embedding = c.Embedding?.ToArray() ?? Array.Empty<float>(),
                Similarity = 0f
            }).ToList();
        }).ConfigureAwait(false);
    }

    static async Task<List<VectorEntry>> SearchAsync(
        List<VectorEntry> store,
        string query)
    {
        return await Task.Run(() =>
        {
            return store
                .OrderByDescending(e => e.Similarity)
                .ToList();
        }).ConfigureAwait(false);
    }

    class VectorEntry
    {
        public string Content { get; set; } = string.Empty;
        public float[] Embedding { get; set; } = Array.Empty<float>();
        public float Similarity { get; set; }
    }
}