Files

86 lines
2.4 KiB
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```csharp title="C#"
using Kreuzberg;
using System.Collections.Generic;
using System.Linq;
class RagPipelineExample
{
static async Task Main()
{
var config = new ExtractionConfig
{
Chunking = new ChunkingConfig
{
MaxChars = 500,
MaxOverlap = 50,
Embedding = new EmbeddingConfig
{
Model = EmbeddingModelType.Preset("all-mpnet-base-v2"),
Normalize = true,
BatchSize = 16
}
}
};
try
{
var result = await KreuzbergLib.ExtractFileAsync(
"research_paper.pdf",
config
).ConfigureAwait(false);
var vectorStore = await BuildVectorStoreAsync(result.Chunks)
.ConfigureAwait(false);
var query = "machine learning optimization";
var relevantChunks = await SearchAsync(vectorStore, query)
.ConfigureAwait(false);
Console.WriteLine($"Found {relevantChunks.Count} relevant chunks");
foreach (var chunk in relevantChunks.Take(3))
{
Console.WriteLine($"Content: {chunk.Content[..80]}...");
Console.WriteLine($"Similarity: {chunk.Similarity:F3}\n");
}
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
static async Task<List<VectorEntry>> BuildVectorStoreAsync(
IEnumerable<Chunk> chunks)
{
return await Task.Run(() =>
{
return chunks.Select(c => new VectorEntry
{
Content = c.Content,
Embedding = c.Embedding?.ToArray() ?? Array.Empty<float>(),
Similarity = 0f
}).ToList();
}).ConfigureAwait(false);
}
static async Task<List<VectorEntry>> SearchAsync(
List<VectorEntry> store,
string query)
{
return await Task.Run(() =>
{
return store
.OrderByDescending(e => e.Similarity)
.ToList();
}).ConfigureAwait(false);
}
class VectorEntry
{
public string Content { get; set; } = string.Empty;
public float[] Embedding { get; set; } = Array.Empty<float>();
public float Similarity { get; set; }
}
}
```