This commit is contained in:
68
docs/snippets/csharp/advanced/custom_extractor.cs
Normal file
68
docs/snippets/csharp/advanced/custom_extractor.cs
Normal file
@@ -0,0 +1,68 @@
|
||||
using Kreuzberg;
|
||||
using System.Text.Json;
|
||||
|
||||
// NOTE: IDocumentExtractor interface is not available in C# bindings
|
||||
|
||||
class CustomJsonProcessor
|
||||
{
|
||||
public static ExtractionResult ProcessJson(byte[] content, string mimeType)
|
||||
{
|
||||
try
|
||||
{
|
||||
var jsonContent = System.Text.Encoding.UTF8.GetString(content);
|
||||
var document = JsonDocument.Parse(jsonContent);
|
||||
|
||||
var text = ExtractText(document.RootElement);
|
||||
|
||||
return new ExtractionResult
|
||||
{
|
||||
Content = text,
|
||||
MimeType = mimeType,
|
||||
Metadata = new Metadata(),
|
||||
Tables = new List<Table>(),
|
||||
Success = true
|
||||
};
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
throw new KreuzbergParsingException($"Failed to parse JSON: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractText(JsonElement element)
|
||||
{
|
||||
return element.ValueKind switch
|
||||
{
|
||||
JsonValueKind.String => element.GetString() + "\n",
|
||||
JsonValueKind.Array => string.Concat(
|
||||
element.EnumerateArray().Select(ExtractText)
|
||||
),
|
||||
JsonValueKind.Object => string.Concat(
|
||||
element.EnumerateObject()
|
||||
.Select(p => ExtractText(p.Value))
|
||||
),
|
||||
_ => ""
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
class Program
|
||||
{
|
||||
static void Main()
|
||||
{
|
||||
try
|
||||
{
|
||||
var jsonBytes = System.Text.Encoding.UTF8.GetBytes(
|
||||
@"{""name"": ""John"", ""age"": 30}"
|
||||
);
|
||||
|
||||
var result = CustomJsonProcessor.ProcessJson(jsonBytes, "application/json");
|
||||
|
||||
Console.WriteLine($"Extracted: {result.Content}");
|
||||
}
|
||||
catch (KreuzbergException ex)
|
||||
{
|
||||
Console.WriteLine($"Error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user