Files
fil/docs/snippets/csharp/advanced/custom_extractor.cs

69 lines
1.8 KiB
C#
Raw Normal View History

2026-06-01 23:40:55 +02:00
using Kreuzberg;
using System.Text.Json;
// NOTE: IDocumentExtractor interface is not available in C# bindings
class CustomJsonProcessor
{
public static ExtractionResult ProcessJson(byte[] content, string mimeType)
{
try
{
var jsonContent = System.Text.Encoding.UTF8.GetString(content);
var document = JsonDocument.Parse(jsonContent);
var text = ExtractText(document.RootElement);
return new ExtractionResult
{
Content = text,
MimeType = mimeType,
Metadata = new Metadata(),
Tables = new List<Table>(),
Success = true
};
}
catch (JsonException ex)
{
throw new KreuzbergParsingException($"Failed to parse JSON: {ex.Message}");
}
}
private static string ExtractText(JsonElement element)
{
return element.ValueKind switch
{
JsonValueKind.String => element.GetString() + "\n",
JsonValueKind.Array => string.Concat(
element.EnumerateArray().Select(ExtractText)
),
JsonValueKind.Object => string.Concat(
element.EnumerateObject()
.Select(p => ExtractText(p.Value))
),
_ => ""
};
}
}
class Program
{
static void Main()
{
try
{
var jsonBytes = System.Text.Encoding.UTF8.GetBytes(
@"{""name"": ""John"", ""age"": 30}"
);
var result = CustomJsonProcessor.ProcessJson(jsonBytes, "application/json");
Console.WriteLine($"Extracted: {result.Content}");
}
catch (KreuzbergException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
}
}