Files
fil/docs/snippets/csharp/advanced/streaming.cs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

109 lines
2.7 KiB
C#

using Kreuzberg;
using System.IO;
class Program
{
static async Task Main()
{
try
{
var filePath = "large_document.pdf";
await ProcessLargeFileAsync(filePath);
}
catch (Exception ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
static async Task ProcessLargeFileAsync(string filePath)
{
var config = new ExtractionConfig
{
EnableQualityProcessing = true
};
var result = await KreuzbergLib.ExtractFileAsync(filePath, config);
var contentChunks = ChunkContent(result.Content, chunkSize: 1000);
Console.WriteLine($"Processing {contentChunks.Count} chunks");
foreach (var (index, chunk) in contentChunks.Select((c, i) => (i, c)))
{
Console.WriteLine($"Chunk {index}: {chunk.Length} characters");
await ProcessChunkAsync(chunk);
}
}
static async Task ProcessChunkAsync(string chunk)
{
var wordCount = chunk.Split(
new[] { ' ', '\n', '\r' },
StringSplitOptions.RemoveEmptyEntries
).Length;
Console.WriteLine($" Words: {wordCount}");
await Task.Delay(10);
}
static List<string> ChunkContent(string content, int chunkSize)
{
var chunks = new List<string>();
for (int i = 0; i < content.Length; i += chunkSize)
{
var chunk = content.Substring(
i,
Math.Min(chunkSize, content.Length - i)
);
chunks.Add(chunk);
}
return chunks;
}
static async IAsyncEnumerable<string> StreamExtractedChunksAsync(
string filePath)
{
var result = await KreuzbergLib.ExtractFileAsync(filePath);
if (result.Chunks?.Any() == true)
{
foreach (var chunk in result.Chunks)
{
yield return chunk.Content;
await Task.Yield();
}
}
else
{
var content = result.Content;
const int chunkSize = 512;
for (int i = 0; i < content.Length; i += chunkSize)
{
var chunk = content.Substring(
i,
Math.Min(chunkSize, content.Length - i)
);
yield return chunk;
await Task.Yield();
}
}
}
static async Task StreamProcessingExample()
{
var streamEnumerator = StreamExtractedChunksAsync("document.pdf");
int index = 0;
await foreach (var chunk in streamEnumerator)
{
Console.WriteLine($"Chunk {index++}: {chunk[..50]}...");
}
}
}