This commit is contained in:
75
docs/snippets/csharp/advanced/chunking_config.cs
Normal file
75
docs/snippets/csharp/advanced/chunking_config.cs
Normal file
@@ -0,0 +1,75 @@
|
||||
using Kreuzberg;
|
||||
|
||||
class Program
|
||||
{
|
||||
static async Task Main()
|
||||
{
|
||||
var config = new ExtractionConfig
|
||||
{
|
||||
Chunking = new ChunkingConfig
|
||||
{
|
||||
MaxChars = 1000,
|
||||
MaxOverlap = 200,
|
||||
Embedding = new EmbeddingConfig
|
||||
{
|
||||
Model = EmbeddingModelType.Preset("all-minilm-l6-v2"),
|
||||
Normalize = true,
|
||||
BatchSize = 32
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var result = await KreuzbergLib.ExtractFileAsync(
|
||||
"document.pdf",
|
||||
config
|
||||
).ConfigureAwait(false);
|
||||
|
||||
Console.WriteLine($"Chunks: {result.Chunks.Count}");
|
||||
foreach (var chunk in result.Chunks)
|
||||
{
|
||||
Console.WriteLine($"Content length: {chunk.Content.Length}");
|
||||
if (chunk.Embedding != null)
|
||||
{
|
||||
Console.WriteLine($"Embedding dimensions: {chunk.Embedding.Length}");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (KreuzbergException ex)
|
||||
{
|
||||
Console.WriteLine($"Error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
static async Task PrependHeadingContextExample()
|
||||
{
|
||||
var config = new ExtractionConfig
|
||||
{
|
||||
Chunking = new ChunkingConfig
|
||||
{
|
||||
MaxChars = 500,
|
||||
MaxOverlap = 50,
|
||||
PrependHeadingContext = true
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
var result = await KreuzbergLib.ExtractFileAsync(
|
||||
"document.md",
|
||||
config
|
||||
).ConfigureAwait(false);
|
||||
|
||||
foreach (var chunk in result.Chunks)
|
||||
{
|
||||
// Each chunk's content is prefixed with its heading breadcrumb
|
||||
Console.WriteLine(chunk.Content[..Math.Min(100, chunk.Content.Length)]);
|
||||
}
|
||||
}
|
||||
catch (KreuzbergException ex)
|
||||
{
|
||||
Console.WriteLine($"Error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user