Files
fil/docs/snippets/csharp/advanced/extract_from_url.cs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

74 lines
2.1 KiB
C#

using Kreuzberg;
using System.Net.Http;
class Program
{
static async Task Main()
{
using var httpClient = new HttpClient();
try
{
var url = "https://example.com/document.pdf";
var documentBytes = await httpClient.GetByteArrayAsync(url);
var result = await KreuzbergLib.ExtractBytesAsync(
documentBytes,
"application/pdf"
);
Console.WriteLine($"Extracted from URL: {result.Content.Length} chars");
var config = new ExtractionConfig
{
EnableQualityProcessing = true
};
var result2 = await KreuzbergLib.ExtractBytesAsync(
documentBytes,
"application/pdf",
config
);
Console.WriteLine($"Quality score: {result2.QualityScore}");
var urls = new[]
{
"https://example.com/doc1.pdf",
"https://example.com/doc2.pdf",
"https://example.com/doc3.pdf"
};
var downloadTasks = urls.Select(async u =>
{
try
{
var bytes = await httpClient.GetByteArrayAsync(u);
return await KreuzbergLib.ExtractBytesAsync(
bytes,
"application/pdf"
);
}
catch (HttpRequestException ex)
{
Console.WriteLine($"Download failed for {u}: {ex.Message}");
return null;
}
});
var results = await Task.WhenAll(downloadTasks);
var successCount = results.Count(r => r != null);
Console.WriteLine($"Successfully processed {successCount} documents");
}
catch (HttpRequestException ex)
{
Console.WriteLine($"HTTP error: {ex.Message}");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Extraction error: {ex.Message}");
}
}
}