This commit is contained in:
32
docs/snippets/csharp/utils/chunking_rag.md
Normal file
32
docs/snippets/csharp/utils/chunking_rag.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```csharp title="C#"
|
||||
using Kreuzberg;
|
||||
|
||||
var config = new ExtractionConfig
|
||||
{
|
||||
Chunking = new ChunkingConfig
|
||||
{
|
||||
MaxCharacters = 500,
|
||||
Overlap = 50,
|
||||
Embedding = new EmbeddingConfig
|
||||
{
|
||||
Model = new EmbeddingModelType.Preset("balanced"),
|
||||
Normalize = true,
|
||||
BatchSize = 16,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
var result = await KreuzbergLib.ExtractFile("research_paper.pdf", null, config);
|
||||
|
||||
var chunksWithEmbeddings = new List<(string Preview, int Dimensions)>();
|
||||
foreach (var chunk in result.Chunks ?? new List<Chunk>())
|
||||
{
|
||||
if (chunk.Embedding is { Count: > 0 } embedding)
|
||||
{
|
||||
var preview = chunk.Content.Length > 100 ? chunk.Content[..100] : chunk.Content;
|
||||
chunksWithEmbeddings.Add((preview, embedding.Count));
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine($"Chunks with embeddings: {chunksWithEmbeddings.Count}");
|
||||
```
|
||||
Reference in New Issue
Block a user