Files

33 lines
873 B
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```csharp title="C#"
using Kreuzberg;
var config = new ExtractionConfig
{
Chunking = new ChunkingConfig
{
MaxCharacters = 500,
Overlap = 50,
Embedding = new EmbeddingConfig
{
Model = new EmbeddingModelType.Preset("balanced"),
Normalize = true,
BatchSize = 16,
},
},
};
var result = await KreuzbergLib.ExtractFile("research_paper.pdf", null, config);
var chunksWithEmbeddings = new List<(string Preview, int Dimensions)>();
foreach (var chunk in result.Chunks ?? new List<Chunk>())
{
if (chunk.Embedding is { Count: > 0 } embedding)
{
var preview = chunk.Content.Length > 100 ? chunk.Content[..100] : chunk.Content;
chunksWithEmbeddings.Add((preview, embedding.Count));
}
}
Console.WriteLine($"Chunks with embeddings: {chunksWithEmbeddings.Count}");
```