This commit is contained in:
71
packages/csharp/src/Kreuzberg/AccelerationConfig.cs
generated
Normal file
71
packages/csharp/src/Kreuzberg/AccelerationConfig.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Hardware acceleration configuration for ONNX Runtime models.
|
||||
///
|
||||
/// Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used
|
||||
/// for inference in layout detection and embedding generation.
|
||||
/// </summary>
|
||||
public sealed record AccelerationConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Execution provider to use for ONNX inference.
|
||||
/// </summary>
|
||||
[JsonPropertyName("provider")]
|
||||
public ExecutionProviderType? Provider { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// GPU device ID (for CUDA/TensorRT). Ignored for CPU/CoreML/Auto.
|
||||
/// </summary>
|
||||
[JsonPropertyName("device_id")]
|
||||
public uint DeviceId { get; init; } = 0;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="AccelerationConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static AccelerationConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<AccelerationConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse AccelerationConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse AccelerationConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
205
packages/csharp/src/Kreuzberg/AnnotationKind.cs
generated
Normal file
205
packages/csharp/src/Kreuzberg/AnnotationKind.cs
generated
Normal file
@@ -0,0 +1,205 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Types of inline text annotations.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(AnnotationKindJsonConverter))]
|
||||
public abstract record AnnotationKind
|
||||
{
|
||||
public sealed record Bold() : AnnotationKind;
|
||||
|
||||
public sealed record Italic() : AnnotationKind;
|
||||
|
||||
public sealed record Underline() : AnnotationKind;
|
||||
|
||||
public sealed record Strikethrough() : AnnotationKind;
|
||||
|
||||
public sealed record Code() : AnnotationKind;
|
||||
|
||||
public sealed record Subscript() : AnnotationKind;
|
||||
|
||||
public sealed record Superscript() : AnnotationKind;
|
||||
|
||||
public sealed record Link(
|
||||
[property: JsonPropertyName("url")] string Url,
|
||||
[property: JsonPropertyName("title")] string? Title
|
||||
) : AnnotationKind;
|
||||
|
||||
/// <summary>
|
||||
/// Highlighted text (PDF highlights, HTML `<mark>`).
|
||||
/// </summary>
|
||||
public sealed record Highlight() : AnnotationKind;
|
||||
|
||||
/// <summary>
|
||||
/// Text color (CSS-compatible value, e.g. "#ff0000", "red").
|
||||
/// </summary>
|
||||
public sealed record Color(
|
||||
[property: JsonPropertyName("value")] string Value
|
||||
) : AnnotationKind;
|
||||
|
||||
/// <summary>
|
||||
/// Font size with units (e.g. "12pt", "1.2em", "16px").
|
||||
/// </summary>
|
||||
public sealed record FontSize(
|
||||
[property: JsonPropertyName("value")] string Value
|
||||
) : AnnotationKind;
|
||||
|
||||
/// <summary>
|
||||
/// Extensible annotation for format-specific styling.
|
||||
/// </summary>
|
||||
public sealed record Custom(
|
||||
[property: JsonPropertyName("name")] string Name,
|
||||
[property: JsonPropertyName("value")] string? Value
|
||||
) : AnnotationKind;
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Custom converter for AnnotationKind sealed union with flattened variant fields.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles JSON objects with a discriminator field (annotation_type) and variant-specific
|
||||
/// fields at the same level. System.Text.Json's [JsonPolymorphic] cannot handle
|
||||
/// this layout, so we manually deserialize here.
|
||||
/// </remarks>
|
||||
public sealed class AnnotationKindJsonConverter : JsonConverter<AnnotationKind>
|
||||
{
|
||||
public override AnnotationKind Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
if (reader.TokenType != JsonTokenType.StartObject)
|
||||
{
|
||||
throw new JsonException($"Expected JSON object, got {reader.TokenType}");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.ParseValue(ref reader);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("annotation_type", out var tagElement))
|
||||
{
|
||||
throw new JsonException($"Missing discriminator field: annotation_type");
|
||||
}
|
||||
|
||||
var tagValue = tagElement.GetString();
|
||||
if (tagValue == null)
|
||||
{
|
||||
throw new JsonException("Discriminator field is null");
|
||||
}
|
||||
|
||||
// Tuple-variant records (`Variant(InnerStruct value)`) expect a single
|
||||
// "Value" field holding the inner struct's JSON, so wrap the remaining
|
||||
// fields under "Value". Struct-variant records (`Variant { field1,
|
||||
// field2 }`) have positional record components annotated with
|
||||
// [JsonPropertyName(...)] for each named field, so pass the remaining
|
||||
// fields through directly without the wrap.
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new Utf8JsonWriter(ms);
|
||||
writer.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "annotation_type")
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
var flatJson = ms.ToArray();
|
||||
|
||||
using var msWrapped = new MemoryStream();
|
||||
using var writerWrapped = new Utf8JsonWriter(msWrapped);
|
||||
writerWrapped.WriteStartObject();
|
||||
writerWrapped.WritePropertyName("Value");
|
||||
writerWrapped.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "annotation_type")
|
||||
{
|
||||
writerWrapped.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writerWrapped);
|
||||
}
|
||||
}
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.Flush();
|
||||
msWrapped.Position = 0;
|
||||
var wrappedJson = msWrapped.ToArray();
|
||||
|
||||
return tagValue switch
|
||||
{ "bold" => new AnnotationKind.Bold(), "italic" => new AnnotationKind.Italic(), "underline" => new AnnotationKind.Underline(), "strikethrough" => new AnnotationKind.Strikethrough(), "code" => new AnnotationKind.Code(), "subscript" => new AnnotationKind.Subscript(), "superscript" => new AnnotationKind.Superscript(), "link" => JsonSerializer.Deserialize<AnnotationKind.Link>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), "highlight" => new AnnotationKind.Highlight(), "color" => JsonSerializer.Deserialize<AnnotationKind.Color>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), "font_size" => JsonSerializer.Deserialize<AnnotationKind.FontSize>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), "custom" => JsonSerializer.Deserialize<AnnotationKind.Custom>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), _ => throw new JsonException($"Unknown AnnotationKind discriminator: {tagValue}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, AnnotationKind value, JsonSerializerOptions options)
|
||||
{
|
||||
// Emit the discriminator tag plus the inner variant's fields flattened at
|
||||
// the same level — mirrors the Java sealed-union serializer pattern. Turn
|
||||
// `Message.User(UserMessage value)` into `{"annotation_type":"user","content":...}`
|
||||
// not `{"value":{...}}`. Without this, sending a chat request to FFI fails
|
||||
// with "missing field annotation_type" inside Rust serde.
|
||||
string tag;
|
||||
object? inner;
|
||||
switch (value)
|
||||
{ case AnnotationKind.Bold _:
|
||||
tag = "bold";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Italic _:
|
||||
tag = "italic";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Underline _:
|
||||
tag = "underline";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Strikethrough _:
|
||||
tag = "strikethrough";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Code _:
|
||||
tag = "code";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Subscript _:
|
||||
tag = "subscript";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Superscript _:
|
||||
tag = "superscript";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Link v_link:
|
||||
tag = "link"; inner = v_link; break; case AnnotationKind.Highlight _:
|
||||
tag = "highlight";
|
||||
inner = null;
|
||||
break; case AnnotationKind.Color v_color:
|
||||
tag = "color"; inner = v_color; break; case AnnotationKind.FontSize v_fontsize:
|
||||
tag = "font_size"; inner = v_fontsize; break; case AnnotationKind.Custom v_custom:
|
||||
tag = "custom"; inner = v_custom; break; default:
|
||||
throw new JsonException($"Unknown AnnotationKind variant: {value.GetType().Name}");
|
||||
}
|
||||
|
||||
writer.WriteStartObject();
|
||||
writer.WriteString("annotation_type", tag);
|
||||
if (inner != null)
|
||||
{
|
||||
using var doc = JsonSerializer.SerializeToDocument(inner, inner.GetType(), options);
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in doc.RootElement.EnumerateObject())
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
}
|
||||
}
|
||||
77
packages/csharp/src/Kreuzberg/ArchiveEntry.cs
generated
Normal file
77
packages/csharp/src/Kreuzberg/ArchiveEntry.cs
generated
Normal file
@@ -0,0 +1,77 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single file extracted from an archive.
|
||||
///
|
||||
/// When archives (ZIP, TAR, 7Z, GZIP) are extracted with recursive extraction
|
||||
/// enabled, each processable file produces its own full `ExtractionResult`.
|
||||
/// </summary>
|
||||
public sealed record ArchiveEntry
|
||||
{
|
||||
/// <summary>
|
||||
/// Archive-relative file path (e.g. "folder/document.pdf").
|
||||
/// </summary>
|
||||
[JsonPropertyName("path")]
|
||||
public required string Path { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Detected MIME type of the file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("mime_type")]
|
||||
public required string MimeType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Full extraction result for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("result")]
|
||||
public required ExtractionResult Result { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ArchiveEntry"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ArchiveEntry FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ArchiveEntry>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ArchiveEntry from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ArchiveEntry from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
88
packages/csharp/src/Kreuzberg/ArchiveMetadata.cs
generated
Normal file
88
packages/csharp/src/Kreuzberg/ArchiveMetadata.cs
generated
Normal file
@@ -0,0 +1,88 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Archive (ZIP/TAR/7Z) metadata.
|
||||
///
|
||||
/// Extracted from compressed archive files containing file lists and size information.
|
||||
/// </summary>
|
||||
public sealed record ArchiveMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Archive format ("ZIP", "TAR", "7Z", etc.)
|
||||
/// </summary>
|
||||
[JsonPropertyName("format")]
|
||||
public string Format { get; init; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Total number of files in the archive
|
||||
/// </summary>
|
||||
[JsonPropertyName("file_count")]
|
||||
public uint FileCount { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// List of file paths within the archive
|
||||
/// </summary>
|
||||
[JsonPropertyName("file_list")]
|
||||
public List<string> FileList { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Total uncompressed size in bytes
|
||||
/// </summary>
|
||||
[JsonPropertyName("total_size")]
|
||||
public ulong TotalSize { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Compressed size in bytes (if available)
|
||||
/// </summary>
|
||||
[JsonPropertyName("compressed_size")]
|
||||
public ulong? CompressedSize { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ArchiveMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ArchiveMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ArchiveMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ArchiveMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ArchiveMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/BBox.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/BBox.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box in original image coordinates (x1, y1) top-left, (x2, y2) bottom-right.
|
||||
/// </summary>
|
||||
public sealed record BBox
|
||||
{
|
||||
[JsonPropertyName("x1")]
|
||||
public float X1 { get; init; } = 0.0f;
|
||||
|
||||
[JsonPropertyName("y1")]
|
||||
public float Y1 { get; init; } = 0.0f;
|
||||
|
||||
[JsonPropertyName("x2")]
|
||||
public float X2 { get; init; } = 0.0f;
|
||||
|
||||
[JsonPropertyName("y2")]
|
||||
public float Y2 { get; init; } = 0.0f;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="BBox"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static BBox FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<BBox>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse BBox from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse BBox from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
78
packages/csharp/src/Kreuzberg/BatchBytesItem.cs
generated
Normal file
78
packages/csharp/src/Kreuzberg/BatchBytesItem.cs
generated
Normal file
@@ -0,0 +1,78 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Batch item for byte array extraction.
|
||||
///
|
||||
/// Used with `batch_extract_bytes` and `batch_extract_bytes_sync`
|
||||
/// to represent a single item in a batch extraction job.
|
||||
/// </summary>
|
||||
public sealed record BatchBytesItem
|
||||
{
|
||||
/// <summary>
|
||||
/// The content bytes to extract from
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ByteArrayToIntArrayConverter))]
|
||||
[JsonPropertyName("content")]
|
||||
public byte[] Content { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// MIME type of the content (e.g., "application/pdf", "text/html")
|
||||
/// </summary>
|
||||
[JsonPropertyName("mime_type")]
|
||||
public required string MimeType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Per-item configuration overrides (null uses batch-level defaults)
|
||||
/// </summary>
|
||||
[JsonPropertyName("config")]
|
||||
public FileExtractionConfig? Config { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="BatchBytesItem"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static BatchBytesItem FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<BatchBytesItem>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse BatchBytesItem from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse BatchBytesItem from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
71
packages/csharp/src/Kreuzberg/BatchFileItem.cs
generated
Normal file
71
packages/csharp/src/Kreuzberg/BatchFileItem.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Batch item for file extraction.
|
||||
///
|
||||
/// Used with `batch_extract_files` and `batch_extract_files_sync`
|
||||
/// to represent a single file in a batch extraction job.
|
||||
/// </summary>
|
||||
public sealed record BatchFileItem
|
||||
{
|
||||
/// <summary>
|
||||
/// Path to the file to extract from
|
||||
/// </summary>
|
||||
[JsonPropertyName("path")]
|
||||
public required string Path { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Per-file configuration overrides (null uses batch-level defaults)
|
||||
/// </summary>
|
||||
[JsonPropertyName("config")]
|
||||
public FileExtractionConfig? Config { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="BatchFileItem"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static BatchFileItem FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<BatchFileItem>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse BatchFileItem from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse BatchFileItem from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
74
packages/csharp/src/Kreuzberg/BibtexMetadata.cs
generated
Normal file
74
packages/csharp/src/Kreuzberg/BibtexMetadata.cs
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// BibTeX bibliography metadata.
|
||||
/// </summary>
|
||||
public sealed record BibtexMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Number of entries in the bibliography.
|
||||
/// </summary>
|
||||
[JsonPropertyName("entry_count")]
|
||||
public ulong EntryCount { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("citation_keys")]
|
||||
public List<string> CitationKeys { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("authors")]
|
||||
public List<string> Authors { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("year_range")]
|
||||
public YearRange? YearRange { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("entry_types")]
|
||||
public Dictionary<string, ulong>? EntryTypes { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="BibtexMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static BibtexMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<BibtexMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse BibtexMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse BibtexMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
105
packages/csharp/src/Kreuzberg/BlockType.cs
generated
Normal file
105
packages/csharp/src/Kreuzberg/BlockType.cs
generated
Normal file
@@ -0,0 +1,105 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Types of block-level elements in Djot.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(BlockTypeJsonConverter))]
|
||||
public enum BlockType
|
||||
{
|
||||
[JsonPropertyName("paragraph")]
|
||||
Paragraph,
|
||||
[JsonPropertyName("heading")]
|
||||
Heading,
|
||||
[JsonPropertyName("blockquote")]
|
||||
Blockquote,
|
||||
[JsonPropertyName("code_block")]
|
||||
CodeBlock,
|
||||
[JsonPropertyName("list_item")]
|
||||
ListItem,
|
||||
[JsonPropertyName("ordered_list")]
|
||||
OrderedList,
|
||||
[JsonPropertyName("bullet_list")]
|
||||
BulletList,
|
||||
[JsonPropertyName("task_list")]
|
||||
TaskList,
|
||||
[JsonPropertyName("definition_list")]
|
||||
DefinitionList,
|
||||
[JsonPropertyName("definition_term")]
|
||||
DefinitionTerm,
|
||||
[JsonPropertyName("definition_description")]
|
||||
DefinitionDescription,
|
||||
[JsonPropertyName("div")]
|
||||
Div,
|
||||
[JsonPropertyName("section")]
|
||||
Section,
|
||||
[JsonPropertyName("thematic_break")]
|
||||
ThematicBreak,
|
||||
[JsonPropertyName("raw_block")]
|
||||
RawBlock,
|
||||
[JsonPropertyName("math_display")]
|
||||
MathDisplay,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="BlockType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class BlockTypeJsonConverter : JsonConverter<BlockType>
|
||||
{
|
||||
public override BlockType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"paragraph" => BlockType.Paragraph,
|
||||
"heading" => BlockType.Heading,
|
||||
"blockquote" => BlockType.Blockquote,
|
||||
"code_block" => BlockType.CodeBlock,
|
||||
"list_item" => BlockType.ListItem,
|
||||
"ordered_list" => BlockType.OrderedList,
|
||||
"bullet_list" => BlockType.BulletList,
|
||||
"task_list" => BlockType.TaskList,
|
||||
"definition_list" => BlockType.DefinitionList,
|
||||
"definition_term" => BlockType.DefinitionTerm,
|
||||
"definition_description" => BlockType.DefinitionDescription,
|
||||
"div" => BlockType.Div,
|
||||
"section" => BlockType.Section,
|
||||
"thematic_break" => BlockType.ThematicBreak,
|
||||
"raw_block" => BlockType.RawBlock,
|
||||
"math_display" => BlockType.MathDisplay,
|
||||
_ => throw new JsonException($"Unknown BlockType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, BlockType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
BlockType.Paragraph => "paragraph",
|
||||
BlockType.Heading => "heading",
|
||||
BlockType.Blockquote => "blockquote",
|
||||
BlockType.CodeBlock => "code_block",
|
||||
BlockType.ListItem => "list_item",
|
||||
BlockType.OrderedList => "ordered_list",
|
||||
BlockType.BulletList => "bullet_list",
|
||||
BlockType.TaskList => "task_list",
|
||||
BlockType.DefinitionList => "definition_list",
|
||||
BlockType.DefinitionTerm => "definition_term",
|
||||
BlockType.DefinitionDescription => "definition_description",
|
||||
BlockType.Div => "div",
|
||||
BlockType.Section => "section",
|
||||
BlockType.ThematicBreak => "thematic_break",
|
||||
BlockType.RawBlock => "raw_block",
|
||||
BlockType.MathDisplay => "math_display",
|
||||
_ => throw new JsonException($"Unknown BlockType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
80
packages/csharp/src/Kreuzberg/BoundingBox.cs
generated
Normal file
80
packages/csharp/src/Kreuzberg/BoundingBox.cs
generated
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box coordinates for element positioning.
|
||||
/// </summary>
|
||||
public sealed record BoundingBox
|
||||
{
|
||||
/// <summary>
|
||||
/// Left x-coordinate
|
||||
/// </summary>
|
||||
[JsonPropertyName("x0")]
|
||||
public double X0 { get; init; } = 0.0;
|
||||
|
||||
/// <summary>
|
||||
/// Bottom y-coordinate
|
||||
/// </summary>
|
||||
[JsonPropertyName("y0")]
|
||||
public double Y0 { get; init; } = 0.0;
|
||||
|
||||
/// <summary>
|
||||
/// Right x-coordinate
|
||||
/// </summary>
|
||||
[JsonPropertyName("x1")]
|
||||
public double X1 { get; init; } = 0.0;
|
||||
|
||||
/// <summary>
|
||||
/// Top y-coordinate
|
||||
/// </summary>
|
||||
[JsonPropertyName("y1")]
|
||||
public double Y1 { get; init; } = 0.0;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="BoundingBox"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static BoundingBox FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<BoundingBox>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse BoundingBox from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse BoundingBox from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
74
packages/csharp/src/Kreuzberg/ByteArrayToIntArrayConverter.cs
generated
Normal file
74
packages/csharp/src/Kreuzberg/ByteArrayToIntArrayConverter.cs
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Converts byte arrays to and from JSON integer arrays.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// System.Text.Json serializes byte[] as base64 strings by default, but Rust's serde
|
||||
/// for Vec<u8> expects JSON arrays of integers [72, 101, 108, ...].
|
||||
/// Apply this converter to byte[] fields that are serialized to FFI with
|
||||
/// [JsonConverter(typeof(ByteArrayToIntArrayConverter))].
|
||||
/// </remarks>
|
||||
public sealed class ByteArrayToIntArrayConverter : JsonConverter<byte[]>
|
||||
{
|
||||
/// <summary>
|
||||
/// Reads a JSON array of integers and converts it to a byte array.
|
||||
/// </summary>
|
||||
public override byte[]? Read(
|
||||
ref Utf8JsonReader reader,
|
||||
Type typeToConvert,
|
||||
JsonSerializerOptions options)
|
||||
{
|
||||
if (reader.TokenType != JsonTokenType.StartArray)
|
||||
{
|
||||
throw new JsonException("Expected JSON array for byte[]");
|
||||
}
|
||||
|
||||
var bytes = new List<byte>();
|
||||
while (reader.Read())
|
||||
{
|
||||
if (reader.TokenType == JsonTokenType.EndArray)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (reader.TokenType == JsonTokenType.Number)
|
||||
{
|
||||
bytes.Add((byte)reader.GetInt32());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new JsonException($"Unexpected token type: {reader.TokenType}");
|
||||
}
|
||||
}
|
||||
|
||||
return bytes.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes a byte array as a JSON array of integers.
|
||||
/// </summary>
|
||||
public override void Write(
|
||||
Utf8JsonWriter writer,
|
||||
byte[] value,
|
||||
JsonSerializerOptions options)
|
||||
{
|
||||
writer.WriteStartArray();
|
||||
foreach (var b in value)
|
||||
{
|
||||
writer.WriteNumberValue(b);
|
||||
}
|
||||
writer.WriteEndArray();
|
||||
}
|
||||
}
|
||||
14
packages/csharp/src/Kreuzberg/CacheException.cs
generated
Normal file
14
packages/csharp/src/Kreuzberg/CacheException.cs
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
public class CacheException : KreuzbergErrorException
|
||||
{
|
||||
public CacheException(string message) : base(message) { }
|
||||
|
||||
public CacheException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/CacheStats.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/CacheStats.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
public sealed record CacheStats
|
||||
{
|
||||
[JsonPropertyName("total_files")]
|
||||
public ulong TotalFiles { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("total_size_mb")]
|
||||
public double TotalSizeMb { get; init; } = 0.0;
|
||||
|
||||
[JsonPropertyName("available_space_mb")]
|
||||
public double AvailableSpaceMb { get; init; } = 0.0;
|
||||
|
||||
[JsonPropertyName("oldest_file_age_days")]
|
||||
public double OldestFileAgeDays { get; init; } = 0.0;
|
||||
|
||||
[JsonPropertyName("newest_file_age_days")]
|
||||
public double NewestFileAgeDays { get; init; } = 0.0;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="CacheStats"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static CacheStats FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<CacheStats>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse CacheStats from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse CacheStats from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
14
packages/csharp/src/Kreuzberg/CancelledException.cs
generated
Normal file
14
packages/csharp/src/Kreuzberg/CancelledException.cs
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
public class CancelledException : KreuzbergErrorException
|
||||
{
|
||||
public CancelledException(string message) : base(message) { }
|
||||
|
||||
public CancelledException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
84
packages/csharp/src/Kreuzberg/CellChange.cs
generated
Normal file
84
packages/csharp/src/Kreuzberg/CellChange.cs
generated
Normal file
@@ -0,0 +1,84 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single changed cell within a table.
|
||||
///
|
||||
/// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can
|
||||
/// reference it unconditionally, without requiring the `diff` Cargo feature.
|
||||
/// `crate.diff` re-exports this type verbatim.
|
||||
/// </summary>
|
||||
public sealed record CellChange
|
||||
{
|
||||
/// <summary>
|
||||
/// Zero-based row index.
|
||||
/// </summary>
|
||||
[JsonPropertyName("row")]
|
||||
public ulong Row { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Zero-based column index.
|
||||
/// </summary>
|
||||
[JsonPropertyName("col")]
|
||||
public ulong Col { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Value before the change.
|
||||
/// </summary>
|
||||
[JsonPropertyName("from")]
|
||||
public required string From { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Value after the change.
|
||||
/// </summary>
|
||||
[JsonPropertyName("to")]
|
||||
public required string To { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="CellChange"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static CellChange FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<CellChange>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse CellChange from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse CellChange from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
90
packages/csharp/src/Kreuzberg/Chunk.cs
generated
Normal file
90
packages/csharp/src/Kreuzberg/Chunk.cs
generated
Normal file
@@ -0,0 +1,90 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A text chunk with optional embedding and metadata.
|
||||
///
|
||||
/// Chunks are created when chunking is enabled in `ExtractionConfig`. Each chunk
|
||||
/// contains the text content, optional embedding vector (if embedding generation
|
||||
/// is configured), and metadata about its position in the document.
|
||||
/// </summary>
|
||||
public sealed record Chunk
|
||||
{
|
||||
/// <summary>
|
||||
/// The text content of this chunk.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content")]
|
||||
public required string Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Semantic structural classification of this chunk.
|
||||
///
|
||||
/// Assigned by the heuristic classifier based on content patterns and
|
||||
/// heading context. Defaults to `ChunkType.Unknown` when no rule matches.
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunk_type")]
|
||||
public ChunkType? ChunkType { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Optional embedding vector for this chunk.
|
||||
///
|
||||
/// Only populated when `EmbeddingConfig` is provided in chunking configuration.
|
||||
/// The dimensionality depends on the chosen embedding model.
|
||||
/// </summary>
|
||||
[JsonPropertyName("embedding")]
|
||||
public List<float>? Embedding { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Metadata about this chunk's position and properties.
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata")]
|
||||
public required ChunkMetadata Metadata { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="Chunk"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static Chunk FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<Chunk>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse Chunk from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse Chunk from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
123
packages/csharp/src/Kreuzberg/ChunkMetadata.cs
generated
Normal file
123
packages/csharp/src/Kreuzberg/ChunkMetadata.cs
generated
Normal file
@@ -0,0 +1,123 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Metadata about a chunk's position in the original document.
|
||||
/// </summary>
|
||||
public sealed record ChunkMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Byte offset where this chunk starts in the original text (UTF-8 valid boundary).
|
||||
/// </summary>
|
||||
[JsonPropertyName("byte_start")]
|
||||
public ulong ByteStart { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Byte offset where this chunk ends in the original text (UTF-8 valid boundary).
|
||||
/// </summary>
|
||||
[JsonPropertyName("byte_end")]
|
||||
public ulong ByteEnd { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Number of tokens in this chunk (if available).
|
||||
///
|
||||
/// This is calculated by the embedding model's tokenizer if embeddings are enabled.
|
||||
/// </summary>
|
||||
[JsonPropertyName("token_count")]
|
||||
public ulong? TokenCount { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Zero-based index of this chunk in the document.
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunk_index")]
|
||||
public ulong ChunkIndex { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Total number of chunks in the document.
|
||||
/// </summary>
|
||||
[JsonPropertyName("total_chunks")]
|
||||
public ulong TotalChunks { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// First page number this chunk spans (1-indexed).
|
||||
///
|
||||
/// Only populated when page tracking is enabled in extraction configuration.
|
||||
/// </summary>
|
||||
[JsonPropertyName("first_page")]
|
||||
public uint? FirstPage { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Last page number this chunk spans (1-indexed, equal to first_page for single-page chunks).
|
||||
///
|
||||
/// Only populated when page tracking is enabled in extraction configuration.
|
||||
/// </summary>
|
||||
[JsonPropertyName("last_page")]
|
||||
public uint? LastPage { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Heading context when using Markdown chunker.
|
||||
///
|
||||
/// Contains the heading hierarchy this chunk falls under.
|
||||
/// Only populated when `ChunkerType.Markdown` is used.
|
||||
/// </summary>
|
||||
[JsonPropertyName("heading_context")]
|
||||
public HeadingContext? HeadingContext { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Indices into `ExtractionResult.images` for images on pages covered by this chunk.
|
||||
///
|
||||
/// Contains zero-based indices into the top-level `images` collection for every
|
||||
/// image whose `page_number` falls within `[first_page, last_page]`.
|
||||
/// Empty when image extraction is disabled or the chunk spans no pages with images.
|
||||
/// </summary>
|
||||
[JsonPropertyName("image_indices")]
|
||||
public List<uint> ImageIndices { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ChunkMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ChunkMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ChunkMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ChunkMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ChunkMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
155
packages/csharp/src/Kreuzberg/ChunkSizing.cs
generated
Normal file
155
packages/csharp/src/Kreuzberg/ChunkSizing.cs
generated
Normal file
@@ -0,0 +1,155 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// How chunk size is measured.
|
||||
///
|
||||
/// Defaults to `Characters` (Unicode character count). When using token-based sizing,
|
||||
/// chunks are sized by token count according to the specified tokenizer.
|
||||
///
|
||||
/// Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer
|
||||
/// available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers
|
||||
/// (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`).
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ChunkSizingJsonConverter))]
|
||||
public abstract record ChunkSizing
|
||||
{
|
||||
/// <summary>
|
||||
/// Size measured in Unicode characters (default).
|
||||
/// </summary>
|
||||
public sealed record Characters() : ChunkSizing;
|
||||
|
||||
/// <summary>
|
||||
/// Size measured in tokens from a HuggingFace tokenizer.
|
||||
/// </summary>
|
||||
public sealed record Tokenizer(
|
||||
[property: JsonPropertyName("model")] string Model,
|
||||
[property: JsonPropertyName("cache_dir")] string? CacheDir
|
||||
) : ChunkSizing;
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Custom converter for ChunkSizing sealed union with flattened variant fields.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles JSON objects with a discriminator field (type) and variant-specific
|
||||
/// fields at the same level. System.Text.Json's [JsonPolymorphic] cannot handle
|
||||
/// this layout, so we manually deserialize here.
|
||||
/// </remarks>
|
||||
public sealed class ChunkSizingJsonConverter : JsonConverter<ChunkSizing>
|
||||
{
|
||||
public override ChunkSizing Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
if (reader.TokenType != JsonTokenType.StartObject)
|
||||
{
|
||||
throw new JsonException($"Expected JSON object, got {reader.TokenType}");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.ParseValue(ref reader);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("type", out var tagElement))
|
||||
{
|
||||
throw new JsonException($"Missing discriminator field: type");
|
||||
}
|
||||
|
||||
var tagValue = tagElement.GetString();
|
||||
if (tagValue == null)
|
||||
{
|
||||
throw new JsonException("Discriminator field is null");
|
||||
}
|
||||
|
||||
// Tuple-variant records (`Variant(InnerStruct value)`) expect a single
|
||||
// "Value" field holding the inner struct's JSON, so wrap the remaining
|
||||
// fields under "Value". Struct-variant records (`Variant { field1,
|
||||
// field2 }`) have positional record components annotated with
|
||||
// [JsonPropertyName(...)] for each named field, so pass the remaining
|
||||
// fields through directly without the wrap.
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new Utf8JsonWriter(ms);
|
||||
writer.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "type")
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
var flatJson = ms.ToArray();
|
||||
|
||||
using var msWrapped = new MemoryStream();
|
||||
using var writerWrapped = new Utf8JsonWriter(msWrapped);
|
||||
writerWrapped.WriteStartObject();
|
||||
writerWrapped.WritePropertyName("Value");
|
||||
writerWrapped.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "type")
|
||||
{
|
||||
writerWrapped.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writerWrapped);
|
||||
}
|
||||
}
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.Flush();
|
||||
msWrapped.Position = 0;
|
||||
var wrappedJson = msWrapped.ToArray();
|
||||
|
||||
return tagValue switch
|
||||
{ "characters" => new ChunkSizing.Characters(), "tokenizer" => JsonSerializer.Deserialize<ChunkSizing.Tokenizer>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), _ => throw new JsonException($"Unknown ChunkSizing discriminator: {tagValue}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ChunkSizing value, JsonSerializerOptions options)
|
||||
{
|
||||
// Emit the discriminator tag plus the inner variant's fields flattened at
|
||||
// the same level — mirrors the Java sealed-union serializer pattern. Turn
|
||||
// `Message.User(UserMessage value)` into `{"type":"user","content":...}`
|
||||
// not `{"value":{...}}`. Without this, sending a chat request to FFI fails
|
||||
// with "missing field type" inside Rust serde.
|
||||
string tag;
|
||||
object? inner;
|
||||
switch (value)
|
||||
{ case ChunkSizing.Characters _:
|
||||
tag = "characters";
|
||||
inner = null;
|
||||
break; case ChunkSizing.Tokenizer v_tokenizer:
|
||||
tag = "tokenizer"; inner = v_tokenizer; break; default:
|
||||
throw new JsonException($"Unknown ChunkSizing variant: {value.GetType().Name}");
|
||||
}
|
||||
|
||||
writer.WriteStartObject();
|
||||
writer.WriteString("type", tag);
|
||||
if (inner != null)
|
||||
{
|
||||
using var doc = JsonSerializer.SerializeToDocument(inner, inner.GetType(), options);
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in doc.RootElement.EnumerateObject())
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
}
|
||||
}
|
||||
136
packages/csharp/src/Kreuzberg/ChunkType.cs
generated
Normal file
136
packages/csharp/src/Kreuzberg/ChunkType.cs
generated
Normal file
@@ -0,0 +1,136 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Semantic structural classification of a text chunk.
|
||||
///
|
||||
/// Assigned by the heuristic classifier in `chunking.classifier`.
|
||||
/// Defaults to `Unknown` when no rule matches.
|
||||
/// Designed to be extended in future versions without breaking changes.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ChunkTypeJsonConverter))]
|
||||
public enum ChunkType
|
||||
{
|
||||
/// <summary>
|
||||
/// Section heading or document title.
|
||||
/// </summary>
|
||||
[JsonPropertyName("heading")]
|
||||
Heading,
|
||||
/// <summary>
|
||||
/// Party list: names, addresses, and signatories.
|
||||
/// </summary>
|
||||
[JsonPropertyName("party_list")]
|
||||
PartyList,
|
||||
/// <summary>
|
||||
/// Definition clause ("X means…", "X shall mean…").
|
||||
/// </summary>
|
||||
[JsonPropertyName("definitions")]
|
||||
Definitions,
|
||||
/// <summary>
|
||||
/// Operative clause containing legal/contractual action verbs.
|
||||
/// </summary>
|
||||
[JsonPropertyName("operative_clause")]
|
||||
OperativeClause,
|
||||
/// <summary>
|
||||
/// Signature block with signatures, names, and dates.
|
||||
/// </summary>
|
||||
[JsonPropertyName("signature_block")]
|
||||
SignatureBlock,
|
||||
/// <summary>
|
||||
/// Schedule, annex, appendix, or exhibit section.
|
||||
/// </summary>
|
||||
[JsonPropertyName("schedule")]
|
||||
Schedule,
|
||||
/// <summary>
|
||||
/// Table-like content with aligned columns or repeated patterns.
|
||||
/// </summary>
|
||||
[JsonPropertyName("table_like")]
|
||||
TableLike,
|
||||
/// <summary>
|
||||
/// Mathematical formula or equation.
|
||||
/// </summary>
|
||||
[JsonPropertyName("formula")]
|
||||
Formula,
|
||||
/// <summary>
|
||||
/// Code block or preformatted content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("code_block")]
|
||||
CodeBlock,
|
||||
/// <summary>
|
||||
/// Embedded or referenced image content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("image")]
|
||||
Image,
|
||||
/// <summary>
|
||||
/// Organizational chart or hierarchy diagram.
|
||||
/// </summary>
|
||||
[JsonPropertyName("org_chart")]
|
||||
OrgChart,
|
||||
/// <summary>
|
||||
/// Diagram, figure, or visual illustration.
|
||||
/// </summary>
|
||||
[JsonPropertyName("diagram")]
|
||||
Diagram,
|
||||
/// <summary>
|
||||
/// Unclassified or mixed content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("unknown")]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ChunkType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ChunkTypeJsonConverter : JsonConverter<ChunkType>
|
||||
{
|
||||
public override ChunkType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"heading" => ChunkType.Heading,
|
||||
"party_list" => ChunkType.PartyList,
|
||||
"definitions" => ChunkType.Definitions,
|
||||
"operative_clause" => ChunkType.OperativeClause,
|
||||
"signature_block" => ChunkType.SignatureBlock,
|
||||
"schedule" => ChunkType.Schedule,
|
||||
"table_like" => ChunkType.TableLike,
|
||||
"formula" => ChunkType.Formula,
|
||||
"code_block" => ChunkType.CodeBlock,
|
||||
"image" => ChunkType.Image,
|
||||
"org_chart" => ChunkType.OrgChart,
|
||||
"diagram" => ChunkType.Diagram,
|
||||
"unknown" => ChunkType.Unknown,
|
||||
_ => throw new JsonException($"Unknown ChunkType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ChunkType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ChunkType.Heading => "heading",
|
||||
ChunkType.PartyList => "party_list",
|
||||
ChunkType.Definitions => "definitions",
|
||||
ChunkType.OperativeClause => "operative_clause",
|
||||
ChunkType.SignatureBlock => "signature_block",
|
||||
ChunkType.Schedule => "schedule",
|
||||
ChunkType.TableLike => "table_like",
|
||||
ChunkType.Formula => "formula",
|
||||
ChunkType.CodeBlock => "code_block",
|
||||
ChunkType.Image => "image",
|
||||
ChunkType.OrgChart => "org_chart",
|
||||
ChunkType.Diagram => "diagram",
|
||||
ChunkType.Unknown => "unknown",
|
||||
_ => throw new JsonException($"Unknown ChunkType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
70
packages/csharp/src/Kreuzberg/ChunkerType.cs
generated
Normal file
70
packages/csharp/src/Kreuzberg/ChunkerType.cs
generated
Normal file
@@ -0,0 +1,70 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Type of text chunker to use.
|
||||
///
|
||||
/// # Variants
|
||||
///
|
||||
/// * `Text` - Generic text splitter, splits on whitespace and punctuation
|
||||
/// * `Markdown` - Markdown-aware splitter, preserves formatting and structure
|
||||
/// * `Yaml` - YAML-aware splitter, creates one chunk per top-level key
|
||||
/// * `Semantic` - Topic-aware chunker. With an `EmbeddingConfig`, splits at
|
||||
/// embedding-based topic shifts tuned by `topic_threshold` (default 0.75,
|
||||
/// lower = more splits). Without an embedding, falls back to a
|
||||
/// structural-boundary heuristic (ALL-CAPS headers, numbered sections,
|
||||
/// blank-line paragraphs) and merges groups into chunks capped at
|
||||
/// `max_characters` (default 1000). `topic_threshold` has no effect in the
|
||||
/// fallback path. For best results, pair with an embedding model.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ChunkerTypeJsonConverter))]
|
||||
public enum ChunkerType
|
||||
{
|
||||
[JsonPropertyName("text")]
|
||||
Text,
|
||||
[JsonPropertyName("markdown")]
|
||||
Markdown,
|
||||
[JsonPropertyName("yaml")]
|
||||
Yaml,
|
||||
[JsonPropertyName("semantic")]
|
||||
Semantic,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ChunkerType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ChunkerTypeJsonConverter : JsonConverter<ChunkerType>
|
||||
{
|
||||
public override ChunkerType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"text" => ChunkerType.Text,
|
||||
"markdown" => ChunkerType.Markdown,
|
||||
"yaml" => ChunkerType.Yaml,
|
||||
"semantic" => ChunkerType.Semantic,
|
||||
_ => throw new JsonException($"Unknown ChunkerType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ChunkerType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ChunkerType.Text => "text",
|
||||
ChunkerType.Markdown => "markdown",
|
||||
ChunkerType.Yaml => "yaml",
|
||||
ChunkerType.Semantic => "semantic",
|
||||
_ => throw new JsonException($"Unknown ChunkerType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
151
packages/csharp/src/Kreuzberg/ChunkingConfig.cs
generated
Normal file
151
packages/csharp/src/Kreuzberg/ChunkingConfig.cs
generated
Normal file
@@ -0,0 +1,151 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Chunking configuration.
|
||||
///
|
||||
/// Configures text chunking for document content, including chunk size,
|
||||
/// overlap, trimming behavior, and optional embeddings.
|
||||
///
|
||||
/// Use `..Default.default()` when constructing to allow for future field additions:
|
||||
/// </summary>
|
||||
public sealed record ChunkingConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Maximum size per chunk (in units determined by `sizing`).
|
||||
///
|
||||
/// When `sizing` is `Characters` (default), this is the max character count.
|
||||
/// When using token-based sizing, this is the max token count.
|
||||
///
|
||||
/// Default: 1000
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_chars")]
|
||||
public ulong MaxCharacters { get; init; } = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Overlap between chunks (in units determined by `sizing`).
|
||||
///
|
||||
/// Default: 200
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_overlap")]
|
||||
public ulong Overlap { get; init; } = 200;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to trim whitespace from chunk boundaries.
|
||||
///
|
||||
/// Default: true
|
||||
/// </summary>
|
||||
[JsonPropertyName("trim")]
|
||||
public bool Trim { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Type of chunker to use (Text or Markdown).
|
||||
///
|
||||
/// Default: Text
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunker_type")]
|
||||
public ChunkerType ChunkerType { get; init; } = ChunkerType.Text;
|
||||
|
||||
/// <summary>
|
||||
/// Optional embedding configuration for chunk embeddings.
|
||||
/// </summary>
|
||||
[JsonPropertyName("embedding")]
|
||||
public EmbeddingConfig? Embedding { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Use a preset configuration (overrides individual settings if provided).
|
||||
/// </summary>
|
||||
[JsonPropertyName("preset")]
|
||||
public string? Preset { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// How to measure chunk size.
|
||||
///
|
||||
/// Default: `Characters` (Unicode character count).
|
||||
/// Enable `chunking-tiktoken` or `chunking-tokenizers` features for token-based sizing.
|
||||
/// </summary>
|
||||
[JsonPropertyName("sizing")]
|
||||
public ChunkSizing? Sizing { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// When `true` and `chunker_type` is `Markdown`, prepend the heading hierarchy
|
||||
/// path (e.g. `"# Title > ## Section\n\n"`) to each chunk's content string.
|
||||
///
|
||||
/// This is useful for RAG pipelines where each chunk needs self-contained
|
||||
/// context about its position in the document structure.
|
||||
///
|
||||
/// Default: `false`
|
||||
/// </summary>
|
||||
[JsonPropertyName("prepend_heading_context")]
|
||||
public bool PrependHeadingContext { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Optional cosine similarity threshold for semantic topic boundary detection.
|
||||
///
|
||||
/// Only used when `chunker_type` is `Semantic` and an `EmbeddingConfig` is
|
||||
/// provided. You almost never need to set this. When omitted, defaults to
|
||||
/// `0.75` which works well for most documents. Lower values detect more
|
||||
/// topic boundaries (more, smaller chunks); higher values detect fewer.
|
||||
/// Range: `0.0..=1.0`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("topic_threshold")]
|
||||
public float? TopicThreshold { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ChunkingConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ChunkingConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ChunkingConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ChunkingConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ChunkingConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static ChunkingConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.ChunkingConfigDefault();
|
||||
var jsonPtr = NativeMethods.ChunkingConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.ChunkingConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<ChunkingConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
74
packages/csharp/src/Kreuzberg/CitationMetadata.cs
generated
Normal file
74
packages/csharp/src/Kreuzberg/CitationMetadata.cs
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Citation file metadata (RIS, PubMed, EndNote).
|
||||
/// </summary>
|
||||
public sealed record CitationMetadata
|
||||
{
|
||||
[JsonPropertyName("citation_count")]
|
||||
public ulong CitationCount { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("format")]
|
||||
public string? Format { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("authors")]
|
||||
public List<string> Authors { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("year_range")]
|
||||
public YearRange? YearRange { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("dois")]
|
||||
public List<string> Dois { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("keywords")]
|
||||
public List<string> Keywords { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="CitationMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static CitationMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<CitationMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse CitationMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse CitationMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
65
packages/csharp/src/Kreuzberg/CodeContentMode.cs
generated
Normal file
65
packages/csharp/src/Kreuzberg/CodeContentMode.cs
generated
Normal file
@@ -0,0 +1,65 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Content rendering mode for code extraction.
|
||||
///
|
||||
/// Controls how extracted code content is represented in the `content` field
|
||||
/// of `ExtractionResult`.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(CodeContentModeJsonConverter))]
|
||||
public enum CodeContentMode
|
||||
{
|
||||
/// <summary>
|
||||
/// Use TSLP semantic chunks as content (default).
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunks")]
|
||||
Chunks,
|
||||
/// <summary>
|
||||
/// Use raw source code as content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("raw")]
|
||||
Raw,
|
||||
/// <summary>
|
||||
/// Emit function/class headings + docstrings (no code bodies).
|
||||
/// </summary>
|
||||
[JsonPropertyName("structure")]
|
||||
Structure,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="CodeContentMode"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class CodeContentModeJsonConverter : JsonConverter<CodeContentMode>
|
||||
{
|
||||
public override CodeContentMode Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"chunks" => CodeContentMode.Chunks,
|
||||
"raw" => CodeContentMode.Raw,
|
||||
"structure" => CodeContentMode.Structure,
|
||||
_ => throw new JsonException($"Unknown CodeContentMode value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, CodeContentMode value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
CodeContentMode.Chunks => "chunks",
|
||||
CodeContentMode.Raw => "raw",
|
||||
CodeContentMode.Structure => "structure",
|
||||
_ => throw new JsonException($"Unknown CodeContentMode value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
132
packages/csharp/src/Kreuzberg/ContentFilterConfig.cs
generated
Normal file
132
packages/csharp/src/Kreuzberg/ContentFilterConfig.cs
generated
Normal file
@@ -0,0 +1,132 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Cross-extractor content filtering configuration.
|
||||
///
|
||||
/// Controls whether "furniture" content (headers, footers, page numbers,
|
||||
/// watermarks, repeating text) is included in or stripped from extraction
|
||||
/// results. Applies across all extractors (PDF, DOCX, RTF, ODT, HTML, etc.)
|
||||
/// with format-specific implementation.
|
||||
///
|
||||
/// When `None` on `ExtractionConfig`, each extractor uses its current
|
||||
/// default behavior unchanged.
|
||||
/// </summary>
|
||||
public sealed record ContentFilterConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Include running headers in extraction output.
|
||||
///
|
||||
/// - PDF: Disables top-margin furniture stripping and prevents the layout
|
||||
/// model from treating `PageHeader`-classified regions as furniture.
|
||||
/// - DOCX: Includes document headers in text output.
|
||||
/// - RTF/ODT: Headers already included; this is a no-op when true.
|
||||
/// - HTML/EPUB: Keeps `<header>` element content.
|
||||
///
|
||||
/// Default: `false` (headers are stripped or excluded).
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_headers")]
|
||||
public bool IncludeHeaders { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Include running footers in extraction output.
|
||||
///
|
||||
/// - PDF: Disables bottom-margin furniture stripping and prevents the layout
|
||||
/// model from treating `PageFooter`-classified regions as furniture.
|
||||
/// - DOCX: Includes document footers in text output.
|
||||
/// - RTF/ODT: Footers already included; this is a no-op when true.
|
||||
/// - HTML/EPUB: Keeps `<footer>` element content.
|
||||
///
|
||||
/// Default: `false` (footers are stripped or excluded).
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_footers")]
|
||||
public bool IncludeFooters { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Enable the heuristic cross-page repeating text detector.
|
||||
///
|
||||
/// When `true` (default), text that repeats verbatim across a supermajority
|
||||
/// of pages is classified as furniture and stripped. Disable this if brand
|
||||
/// names or repeated headings are being incorrectly removed by the heuristic.
|
||||
///
|
||||
/// Note: when a layout-detection model is active, the model may independently
|
||||
/// classify page-header / page-footer regions as furniture on a per-page basis.
|
||||
/// To preserve those regions, set `include_headers = true`, `include_footers = true`,
|
||||
/// or both, in addition to disabling this flag.
|
||||
///
|
||||
/// Primarily affects PDF extraction.
|
||||
///
|
||||
/// Default: `true`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("strip_repeating_text")]
|
||||
public bool StripRepeatingText { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Include watermark text in extraction output.
|
||||
///
|
||||
/// - PDF: Keeps watermark artifacts and arXiv identifiers.
|
||||
/// - Other formats: No effect currently.
|
||||
///
|
||||
/// Default: `false` (watermarks are stripped).
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_watermarks")]
|
||||
public bool IncludeWatermarks { get; init; } = false;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ContentFilterConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ContentFilterConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ContentFilterConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ContentFilterConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ContentFilterConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static ContentFilterConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.ContentFilterConfigDefault();
|
||||
var jsonPtr = NativeMethods.ContentFilterConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.ContentFilterConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<ContentFilterConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
71
packages/csharp/src/Kreuzberg/ContentLayer.cs
generated
Normal file
71
packages/csharp/src/Kreuzberg/ContentLayer.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Content layer classification for document nodes.
|
||||
///
|
||||
/// Replaces separate body/furniture arrays with per-node granularity.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ContentLayerJsonConverter))]
|
||||
public enum ContentLayer
|
||||
{
|
||||
/// <summary>
|
||||
/// Main document body content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("body")]
|
||||
Body,
|
||||
/// <summary>
|
||||
/// Page/section header (running header).
|
||||
/// </summary>
|
||||
[JsonPropertyName("header")]
|
||||
Header,
|
||||
/// <summary>
|
||||
/// Page/section footer (running footer).
|
||||
/// </summary>
|
||||
[JsonPropertyName("footer")]
|
||||
Footer,
|
||||
/// <summary>
|
||||
/// Footnote content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("footnote")]
|
||||
Footnote,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ContentLayer"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ContentLayerJsonConverter : JsonConverter<ContentLayer>
|
||||
{
|
||||
public override ContentLayer Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"body" => ContentLayer.Body,
|
||||
"header" => ContentLayer.Header,
|
||||
"footer" => ContentLayer.Footer,
|
||||
"footnote" => ContentLayer.Footnote,
|
||||
_ => throw new JsonException($"Unknown ContentLayer value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ContentLayer value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ContentLayer.Body => "body",
|
||||
ContentLayer.Header => "header",
|
||||
ContentLayer.Footer => "footer",
|
||||
ContentLayer.Footnote => "footnote",
|
||||
_ => throw new JsonException($"Unknown ContentLayer value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
62
packages/csharp/src/Kreuzberg/ContributorRole.cs
generated
Normal file
62
packages/csharp/src/Kreuzberg/ContributorRole.cs
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// JATS contributor with role.
|
||||
/// </summary>
|
||||
public sealed record ContributorRole
|
||||
{
|
||||
[JsonPropertyName("name")]
|
||||
public required string Name { get; init; }
|
||||
|
||||
[JsonPropertyName("role")]
|
||||
public string? Role { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ContributorRole"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ContributorRole FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ContributorRole>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ContributorRole from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ContributorRole from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
149
packages/csharp/src/Kreuzberg/CoreProperties.cs
generated
Normal file
149
packages/csharp/src/Kreuzberg/CoreProperties.cs
generated
Normal file
@@ -0,0 +1,149 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Dublin Core metadata from docProps/core.xml
|
||||
///
|
||||
/// Contains standard metadata fields defined by the Dublin Core standard
|
||||
/// and Office-specific extensions.
|
||||
/// </summary>
|
||||
public sealed record CoreProperties
|
||||
{
|
||||
/// <summary>
|
||||
/// Document title
|
||||
/// </summary>
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document subject/topic
|
||||
/// </summary>
|
||||
[JsonPropertyName("subject")]
|
||||
public string? Subject { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document creator/author
|
||||
/// </summary>
|
||||
[JsonPropertyName("creator")]
|
||||
public string? Creator { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Keywords or tags
|
||||
/// </summary>
|
||||
[JsonPropertyName("keywords")]
|
||||
public string? Keywords { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document description/abstract
|
||||
/// </summary>
|
||||
[JsonPropertyName("description")]
|
||||
public string? Description { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// User who last modified the document
|
||||
/// </summary>
|
||||
[JsonPropertyName("last_modified_by")]
|
||||
public string? LastModifiedBy { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Revision number
|
||||
/// </summary>
|
||||
[JsonPropertyName("revision")]
|
||||
public string? Revision { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Creation timestamp (ISO 8601)
|
||||
/// </summary>
|
||||
[JsonPropertyName("created")]
|
||||
public string? Created { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Last modification timestamp (ISO 8601)
|
||||
/// </summary>
|
||||
[JsonPropertyName("modified")]
|
||||
public string? Modified { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document category
|
||||
/// </summary>
|
||||
[JsonPropertyName("category")]
|
||||
public string? Category { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Content status (Draft, Final, etc.)
|
||||
/// </summary>
|
||||
[JsonPropertyName("content_status")]
|
||||
public string? ContentStatus { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document language
|
||||
/// </summary>
|
||||
[JsonPropertyName("language")]
|
||||
public string? Language { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Unique identifier
|
||||
/// </summary>
|
||||
[JsonPropertyName("identifier")]
|
||||
public string? Identifier { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document version
|
||||
/// </summary>
|
||||
[JsonPropertyName("version")]
|
||||
public string? Version { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Last print timestamp (ISO 8601)
|
||||
/// </summary>
|
||||
[JsonPropertyName("last_printed")]
|
||||
public string? LastPrinted { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="CoreProperties"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static CoreProperties FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<CoreProperties>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse CoreProperties from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse CoreProperties from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
71
packages/csharp/src/Kreuzberg/CsvMetadata.cs
generated
Normal file
71
packages/csharp/src/Kreuzberg/CsvMetadata.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// CSV/TSV file metadata.
|
||||
/// </summary>
|
||||
public sealed record CsvMetadata
|
||||
{
|
||||
[JsonPropertyName("row_count")]
|
||||
public uint RowCount { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("column_count")]
|
||||
public uint ColumnCount { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("delimiter")]
|
||||
public string? Delimiter { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("has_header")]
|
||||
public bool HasHeader { get; init; } = false;
|
||||
|
||||
[JsonPropertyName("column_types")]
|
||||
public List<string>? ColumnTypes { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="CsvMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static CsvMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<CsvMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse CsvMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse CsvMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
62
packages/csharp/src/Kreuzberg/DbfFieldInfo.cs
generated
Normal file
62
packages/csharp/src/Kreuzberg/DbfFieldInfo.cs
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// dBASE field information.
|
||||
/// </summary>
|
||||
public sealed record DbfFieldInfo
|
||||
{
|
||||
[JsonPropertyName("name")]
|
||||
public required string Name { get; init; }
|
||||
|
||||
[JsonPropertyName("field_type")]
|
||||
public required string FieldType { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DbfFieldInfo"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DbfFieldInfo FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DbfFieldInfo>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DbfFieldInfo from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DbfFieldInfo from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
65
packages/csharp/src/Kreuzberg/DbfMetadata.cs
generated
Normal file
65
packages/csharp/src/Kreuzberg/DbfMetadata.cs
generated
Normal file
@@ -0,0 +1,65 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// dBASE (DBF) file metadata.
|
||||
/// </summary>
|
||||
public sealed record DbfMetadata
|
||||
{
|
||||
[JsonPropertyName("record_count")]
|
||||
public ulong RecordCount { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("field_count")]
|
||||
public ulong FieldCount { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("fields")]
|
||||
public List<DbfFieldInfo> Fields { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DbfMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DbfMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DbfMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DbfMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DbfMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/DetectResponse.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/DetectResponse.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// MIME type detection response.
|
||||
/// </summary>
|
||||
public sealed record DetectResponse
|
||||
{
|
||||
/// <summary>
|
||||
/// Detected MIME type
|
||||
/// </summary>
|
||||
[JsonPropertyName("mime_type")]
|
||||
public required string MimeType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Original filename (if provided)
|
||||
/// </summary>
|
||||
[JsonPropertyName("filename")]
|
||||
public string? Filename { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DetectResponse"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DetectResponse FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DetectResponse>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DetectResponse from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DetectResponse from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
65
packages/csharp/src/Kreuzberg/DetectionResult.cs
generated
Normal file
65
packages/csharp/src/Kreuzberg/DetectionResult.cs
generated
Normal file
@@ -0,0 +1,65 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Page-level detection result containing all detections and page metadata.
|
||||
/// </summary>
|
||||
public sealed record DetectionResult
|
||||
{
|
||||
[JsonPropertyName("page_width")]
|
||||
public uint PageWidth { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("page_height")]
|
||||
public uint PageHeight { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("detections")]
|
||||
public List<LayoutDetection> Detections { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DetectionResult"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DetectionResult FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DetectionResult>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DetectionResult from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DetectionResult from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
86
packages/csharp/src/Kreuzberg/DiffHunk.cs
generated
Normal file
86
packages/csharp/src/Kreuzberg/DiffHunk.cs
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single contiguous hunk in a unified diff.
|
||||
/// </summary>
|
||||
public sealed record DiffHunk
|
||||
{
|
||||
/// <summary>
|
||||
/// Starting line number in the old content (0-indexed).
|
||||
/// </summary>
|
||||
[JsonPropertyName("from_line")]
|
||||
public ulong FromLine { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Number of lines from the old content in this hunk.
|
||||
/// </summary>
|
||||
[JsonPropertyName("from_count")]
|
||||
public ulong FromCount { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Starting line number in the new content (0-indexed).
|
||||
/// </summary>
|
||||
[JsonPropertyName("to_line")]
|
||||
public ulong ToLine { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Number of lines from the new content in this hunk.
|
||||
/// </summary>
|
||||
[JsonPropertyName("to_count")]
|
||||
public ulong ToCount { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Lines that make up this hunk.
|
||||
/// </summary>
|
||||
[JsonPropertyName("lines")]
|
||||
public List<DiffLine> Lines { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DiffHunk"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DiffHunk FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DiffHunk>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DiffHunk from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DiffHunk from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
168
packages/csharp/src/Kreuzberg/DiffLine.cs
generated
Normal file
168
packages/csharp/src/Kreuzberg/DiffLine.cs
generated
Normal file
@@ -0,0 +1,168 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single line in a unified-diff hunk.
|
||||
///
|
||||
/// Defined here (rather than only in `crate.diff`) so `RevisionDelta` can
|
||||
/// reference it unconditionally, without requiring the `diff` Cargo feature.
|
||||
/// `crate.diff` re-exports this type verbatim.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(DiffLineJsonConverter))]
|
||||
public abstract record DiffLine
|
||||
{
|
||||
/// <summary>
|
||||
/// Unchanged context line.
|
||||
/// </summary>
|
||||
public sealed record Context(
|
||||
string Value
|
||||
) : DiffLine;
|
||||
|
||||
/// <summary>
|
||||
/// Line added in the "after" version.
|
||||
/// </summary>
|
||||
public sealed record Added(
|
||||
string Value
|
||||
) : DiffLine;
|
||||
|
||||
/// <summary>
|
||||
/// Line removed from the "before" version.
|
||||
/// </summary>
|
||||
public sealed record Removed(
|
||||
string Value
|
||||
) : DiffLine;
|
||||
|
||||
/// <summary>Returns the Context data if this is a Context variant, otherwise null.</summary>
|
||||
public string? AsContext => this is Context e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Added data if this is a Added variant, otherwise null.</summary>
|
||||
public string? AsAdded => this is Added e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Removed data if this is a Removed variant, otherwise null.</summary>
|
||||
public string? AsRemoved => this is Removed e ? e.Value : null;
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Custom converter for DiffLine sealed union with flattened variant fields.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles JSON objects with a discriminator field (kind) and variant-specific
|
||||
/// fields at the same level. System.Text.Json's [JsonPolymorphic] cannot handle
|
||||
/// this layout, so we manually deserialize here.
|
||||
/// </remarks>
|
||||
public sealed class DiffLineJsonConverter : JsonConverter<DiffLine>
|
||||
{
|
||||
public override DiffLine Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
if (reader.TokenType != JsonTokenType.StartObject)
|
||||
{
|
||||
throw new JsonException($"Expected JSON object, got {reader.TokenType}");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.ParseValue(ref reader);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("kind", out var tagElement))
|
||||
{
|
||||
throw new JsonException($"Missing discriminator field: kind");
|
||||
}
|
||||
|
||||
var tagValue = tagElement.GetString();
|
||||
if (tagValue == null)
|
||||
{
|
||||
throw new JsonException("Discriminator field is null");
|
||||
}
|
||||
|
||||
// Tuple-variant records (`Variant(InnerStruct value)`) expect a single
|
||||
// "Value" field holding the inner struct's JSON, so wrap the remaining
|
||||
// fields under "Value". Struct-variant records (`Variant { field1,
|
||||
// field2 }`) have positional record components annotated with
|
||||
// [JsonPropertyName(...)] for each named field, so pass the remaining
|
||||
// fields through directly without the wrap.
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new Utf8JsonWriter(ms);
|
||||
writer.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "kind")
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
var flatJson = ms.ToArray();
|
||||
|
||||
using var msWrapped = new MemoryStream();
|
||||
using var writerWrapped = new Utf8JsonWriter(msWrapped);
|
||||
writerWrapped.WriteStartObject();
|
||||
writerWrapped.WritePropertyName("Value");
|
||||
writerWrapped.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "kind")
|
||||
{
|
||||
writerWrapped.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writerWrapped);
|
||||
}
|
||||
}
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.Flush();
|
||||
msWrapped.Position = 0;
|
||||
var wrappedJson = msWrapped.ToArray();
|
||||
|
||||
return tagValue switch
|
||||
{ "context" => JsonSerializer.Deserialize<DiffLine.Context>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "added" => JsonSerializer.Deserialize<DiffLine.Added>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "removed" => JsonSerializer.Deserialize<DiffLine.Removed>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), _ => throw new JsonException($"Unknown DiffLine discriminator: {tagValue}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, DiffLine value, JsonSerializerOptions options)
|
||||
{
|
||||
// Emit the discriminator tag plus the inner variant's fields flattened at
|
||||
// the same level — mirrors the Java sealed-union serializer pattern. Turn
|
||||
// `Message.User(UserMessage value)` into `{"kind":"user","content":...}`
|
||||
// not `{"value":{...}}`. Without this, sending a chat request to FFI fails
|
||||
// with "missing field kind" inside Rust serde.
|
||||
string tag;
|
||||
object? inner;
|
||||
switch (value)
|
||||
{ case DiffLine.Context v_context:
|
||||
tag = "context"; inner = v_context.Value; break; case DiffLine.Added v_added:
|
||||
tag = "added"; inner = v_added.Value; break; case DiffLine.Removed v_removed:
|
||||
tag = "removed"; inner = v_removed.Value; break; default:
|
||||
throw new JsonException($"Unknown DiffLine variant: {value.GetType().Name}");
|
||||
}
|
||||
|
||||
writer.WriteStartObject();
|
||||
writer.WriteString("kind", tag);
|
||||
if (inner != null)
|
||||
{
|
||||
using var doc = JsonSerializer.SerializeToDocument(inner, inner.GetType(), options);
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in doc.RootElement.EnumerateObject())
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
}
|
||||
}
|
||||
87
packages/csharp/src/Kreuzberg/DiffOptions.cs
generated
Normal file
87
packages/csharp/src/Kreuzberg/DiffOptions.cs
generated
Normal file
@@ -0,0 +1,87 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Options controlling how two `ExtractionResult` values are compared.
|
||||
/// </summary>
|
||||
public sealed record DiffOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Include metadata changes in the diff. Default: `true`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_metadata")]
|
||||
public bool IncludeMetadata { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Include embedded-children changes in the diff. Default: `true`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_embedded")]
|
||||
public bool IncludeEmbedded { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Truncate content to this many characters before diffing.
|
||||
///
|
||||
/// Useful for very large documents where only the first N characters matter.
|
||||
/// `None` means no truncation.
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_content_chars")]
|
||||
public ulong? MaxContentChars { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DiffOptions"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DiffOptions FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DiffOptions>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DiffOptions from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DiffOptions from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static DiffOptions Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.DiffOptionsDefault();
|
||||
var jsonPtr = NativeMethods.DiffOptionsToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.DiffOptionsFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<DiffOptions>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
114
packages/csharp/src/Kreuzberg/DjotContent.cs
generated
Normal file
114
packages/csharp/src/Kreuzberg/DjotContent.cs
generated
Normal file
@@ -0,0 +1,114 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Comprehensive Djot document structure with semantic preservation.
|
||||
///
|
||||
/// This type captures the full richness of Djot markup, including:
|
||||
/// - Block-level structures (headings, lists, blockquotes, code blocks, etc.)
|
||||
/// - Inline formatting (emphasis, strong, highlight, subscript, superscript, etc.)
|
||||
/// - Attributes (classes, IDs, key-value pairs)
|
||||
/// - Links, images, footnotes
|
||||
/// - Math expressions (inline and display)
|
||||
/// - Tables with full structure
|
||||
///
|
||||
/// Available when the `djot` feature is enabled.
|
||||
/// </summary>
|
||||
public sealed record DjotContent
|
||||
{
|
||||
/// <summary>
|
||||
/// Plain text representation for backwards compatibility
|
||||
/// </summary>
|
||||
[JsonPropertyName("plain_text")]
|
||||
public required string PlainText { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Structured block-level content
|
||||
/// </summary>
|
||||
[JsonPropertyName("blocks")]
|
||||
public List<FormattedBlock> Blocks { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Metadata from YAML frontmatter
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata")]
|
||||
public required Metadata Metadata { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Extracted tables as structured data
|
||||
/// </summary>
|
||||
[JsonPropertyName("tables")]
|
||||
public List<Table> Tables { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Extracted images with metadata
|
||||
/// </summary>
|
||||
[JsonPropertyName("images")]
|
||||
public List<DjotImage> Images { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Extracted links with URLs
|
||||
/// </summary>
|
||||
[JsonPropertyName("links")]
|
||||
public List<DjotLink> Links { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Footnote definitions
|
||||
/// </summary>
|
||||
[JsonPropertyName("footnotes")]
|
||||
public List<Footnote> Footnotes { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Attributes mapped by element identifier (if present)
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public List<string>? Attributes { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DjotContent"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DjotContent FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DjotContent>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DjotContent from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DjotContent from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
80
packages/csharp/src/Kreuzberg/DjotImage.cs
generated
Normal file
80
packages/csharp/src/Kreuzberg/DjotImage.cs
generated
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Image element in Djot.
|
||||
/// </summary>
|
||||
public sealed record DjotImage
|
||||
{
|
||||
/// <summary>
|
||||
/// Image source URL or path
|
||||
/// </summary>
|
||||
[JsonPropertyName("src")]
|
||||
public required string Src { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Alternative text
|
||||
/// </summary>
|
||||
[JsonPropertyName("alt")]
|
||||
public required string Alt { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional title
|
||||
/// </summary>
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Element attributes
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public string? Attributes { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DjotImage"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DjotImage FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DjotImage>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DjotImage from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DjotImage from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
80
packages/csharp/src/Kreuzberg/DjotLink.cs
generated
Normal file
80
packages/csharp/src/Kreuzberg/DjotLink.cs
generated
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Link element in Djot.
|
||||
/// </summary>
|
||||
public sealed record DjotLink
|
||||
{
|
||||
/// <summary>
|
||||
/// Link URL
|
||||
/// </summary>
|
||||
[JsonPropertyName("url")]
|
||||
public required string Url { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Link text content
|
||||
/// </summary>
|
||||
[JsonPropertyName("text")]
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional title
|
||||
/// </summary>
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Element attributes
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public string? Attributes { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DjotLink"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DjotLink FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DjotLink>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DjotLink from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DjotLink from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
124
packages/csharp/src/Kreuzberg/DocumentNode.cs
generated
Normal file
124
packages/csharp/src/Kreuzberg/DocumentNode.cs
generated
Normal file
@@ -0,0 +1,124 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single node in the document tree.
|
||||
///
|
||||
/// Each node has deterministic `id`, typed `content`, optional `parent`/`children`
|
||||
/// for tree structure, and metadata like page number, bounding box, and content layer.
|
||||
/// </summary>
|
||||
public sealed record DocumentNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Deterministic identifier (hash of content + position).
|
||||
/// </summary>
|
||||
[JsonPropertyName("id")]
|
||||
public required string Id { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Node content — tagged enum, type-specific data only.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content")]
|
||||
public required NodeContent Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Parent node index (`None` = root-level node).
|
||||
/// </summary>
|
||||
[JsonPropertyName("parent")]
|
||||
public uint? Parent { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Child node indices in reading order.
|
||||
/// </summary>
|
||||
[JsonPropertyName("children")]
|
||||
public List<uint> Children { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Content layer classification.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content_layer")]
|
||||
public ContentLayer? ContentLayer { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Page number where this node starts (1-indexed).
|
||||
/// </summary>
|
||||
[JsonPropertyName("page")]
|
||||
public uint? Page { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Page number where this node ends (for multi-page tables/sections).
|
||||
/// </summary>
|
||||
[JsonPropertyName("page_end")]
|
||||
public uint? PageEnd { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box in document coordinates.
|
||||
/// </summary>
|
||||
[JsonPropertyName("bbox")]
|
||||
public BoundingBox? Bbox { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Inline annotations (formatting, links) on this node's text content.
|
||||
///
|
||||
/// Only meaningful for text-carrying nodes; empty for containers.
|
||||
/// </summary>
|
||||
[JsonPropertyName("annotations")]
|
||||
public List<TextAnnotation> Annotations { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Format-specific key-value attributes.
|
||||
///
|
||||
/// Extensible bag for miscellaneous data without a dedicated typed field: CSS classes,
|
||||
/// LaTeX environment names, Excel cell formulas, slide layout names, etc.
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public Dictionary<string, string>? Attributes { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DocumentNode"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DocumentNode FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DocumentNode>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DocumentNode from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DocumentNode from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
74
packages/csharp/src/Kreuzberg/DocumentRelationship.cs
generated
Normal file
74
packages/csharp/src/Kreuzberg/DocumentRelationship.cs
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A resolved relationship between two nodes in the document tree.
|
||||
/// </summary>
|
||||
public sealed record DocumentRelationship
|
||||
{
|
||||
/// <summary>
|
||||
/// Source node index (the referencing node).
|
||||
/// </summary>
|
||||
[JsonPropertyName("source")]
|
||||
public uint Source { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Target node index (the referenced node).
|
||||
/// </summary>
|
||||
[JsonPropertyName("target")]
|
||||
public uint Target { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Semantic kind of the relationship.
|
||||
/// </summary>
|
||||
[JsonPropertyName("kind")]
|
||||
public required RelationshipKind Kind { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DocumentRelationship"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DocumentRelationship FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DocumentRelationship>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DocumentRelationship from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DocumentRelationship from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
110
packages/csharp/src/Kreuzberg/DocumentRevision.cs
generated
Normal file
110
packages/csharp/src/Kreuzberg/DocumentRevision.cs
generated
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single tracked change embedded in a document.
|
||||
///
|
||||
/// Populated by per-format extractors that understand change-tracking metadata
|
||||
/// (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`, …). Every
|
||||
/// extractor defaults to `ExtractionResult.revisions = None` until a
|
||||
/// format-specific implementation is added.
|
||||
/// </summary>
|
||||
public sealed record DocumentRevision
|
||||
{
|
||||
/// <summary>
|
||||
/// Format-specific revision identifier.
|
||||
///
|
||||
/// For DOCX this is the `w:id` attribute value on the change element
|
||||
/// (e.g. `"42"`). When the attribute is absent a synthetic fallback is
|
||||
/// generated (`"docx-ins-0"`, `"docx-del-3"`, …).
|
||||
/// </summary>
|
||||
[JsonPropertyName("revision_id")]
|
||||
public required string RevisionId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Display name of the author who made this change, when available.
|
||||
/// </summary>
|
||||
[JsonPropertyName("author")]
|
||||
public string? Author { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// ISO-8601 timestamp of the change, when available.
|
||||
///
|
||||
/// Stored as a plain string so this type remains FFI-friendly and
|
||||
/// unconditionally available without the `chrono` optional dep.
|
||||
/// DOCX populates this from the `w:date` attribute (e.g.
|
||||
/// `"2024-03-15T10:30:00Z"`).
|
||||
/// </summary>
|
||||
[JsonPropertyName("timestamp")]
|
||||
public string? Timestamp { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Semantic kind of this revision.
|
||||
/// </summary>
|
||||
[JsonPropertyName("kind")]
|
||||
public required RevisionKind Kind { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Best-effort document location for this revision.
|
||||
///
|
||||
/// Resolution is format-dependent and may be `None` when the location
|
||||
/// cannot be determined (e.g. changes inside table cells before
|
||||
/// table-cell anchor support is added).
|
||||
/// </summary>
|
||||
[JsonPropertyName("anchor")]
|
||||
public RevisionAnchor? Anchor { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// The content changes that make up this revision.
|
||||
/// </summary>
|
||||
[JsonPropertyName("delta")]
|
||||
public required RevisionDelta Delta { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DocumentRevision"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DocumentRevision FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DocumentRevision>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DocumentRevision from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DocumentRevision from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
112
packages/csharp/src/Kreuzberg/DocumentStructure.cs
generated
Normal file
112
packages/csharp/src/Kreuzberg/DocumentStructure.cs
generated
Normal file
@@ -0,0 +1,112 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Top-level structured document representation.
|
||||
///
|
||||
/// A flat array of nodes with index-based parent/child references forming a tree.
|
||||
/// Root-level nodes have `parent: None`. Use `body_roots()` and `furniture_roots()`
|
||||
/// to iterate over top-level content by layer.
|
||||
///
|
||||
/// # Validation
|
||||
///
|
||||
/// Call `validate()` after construction to verify all node indices are in bounds
|
||||
/// and parent-child relationships are bidirectionally consistent.
|
||||
/// </summary>
|
||||
public sealed record DocumentStructure
|
||||
{
|
||||
/// <summary>
|
||||
/// All nodes in document/reading order.
|
||||
/// </summary>
|
||||
[JsonPropertyName("nodes")]
|
||||
public List<DocumentNode> Nodes { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Origin format identifier (e.g. "docx", "pptx", "html", "pdf").
|
||||
///
|
||||
/// Allows renderers to apply format-aware heuristics when converting
|
||||
/// the document tree to output formats.
|
||||
/// </summary>
|
||||
[JsonPropertyName("source_format")]
|
||||
public string? SourceFormat { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Resolved relationships between nodes (footnote refs, citations, anchor links, etc.).
|
||||
///
|
||||
/// Populated during derivation from the internal document representation.
|
||||
/// Empty when no relationships are detected.
|
||||
/// </summary>
|
||||
[JsonPropertyName("relationships")]
|
||||
public List<DocumentRelationship> Relationships { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Sorted, deduplicated list of node type names present in this document.
|
||||
///
|
||||
/// Each value is the snake_case `node_type` tag of the corresponding
|
||||
/// `NodeContent` variant (e.g. `"paragraph"`, `"heading"`, `"table"`, …).
|
||||
///
|
||||
/// Computed from `nodes` via `DocumentStructure.finalize_node_types`.
|
||||
/// Empty until that method is called (internal construction paths call it
|
||||
/// at the end of derivation).
|
||||
/// </summary>
|
||||
[JsonPropertyName("node_types")]
|
||||
public List<string> NodeTypes { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DocumentStructure"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DocumentStructure FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DocumentStructure>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DocumentStructure from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DocumentStructure from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static DocumentStructure Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.DocumentStructureDefault();
|
||||
var jsonPtr = NativeMethods.DocumentStructureToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.DocumentStructureFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<DocumentStructure>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
154
packages/csharp/src/Kreuzberg/DocxAppProperties.cs
generated
Normal file
154
packages/csharp/src/Kreuzberg/DocxAppProperties.cs
generated
Normal file
@@ -0,0 +1,154 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Application properties from docProps/app.xml for DOCX
|
||||
///
|
||||
/// Contains Word-specific document statistics and metadata.
|
||||
/// </summary>
|
||||
public sealed record DocxAppProperties
|
||||
{
|
||||
/// <summary>
|
||||
/// Application name (e.g., "Microsoft Office Word")
|
||||
/// </summary>
|
||||
[JsonPropertyName("application")]
|
||||
public string? Application { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Application version
|
||||
/// </summary>
|
||||
[JsonPropertyName("app_version")]
|
||||
public string? AppVersion { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Template filename
|
||||
/// </summary>
|
||||
[JsonPropertyName("template")]
|
||||
public string? Template { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Total editing time in minutes
|
||||
/// </summary>
|
||||
[JsonPropertyName("total_time")]
|
||||
public int? TotalTime { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of pages
|
||||
/// </summary>
|
||||
[JsonPropertyName("pages")]
|
||||
public int? Pages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of words
|
||||
/// </summary>
|
||||
[JsonPropertyName("words")]
|
||||
public int? Words { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of characters (excluding spaces)
|
||||
/// </summary>
|
||||
[JsonPropertyName("characters")]
|
||||
public int? Characters { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of characters (including spaces)
|
||||
/// </summary>
|
||||
[JsonPropertyName("characters_with_spaces")]
|
||||
public int? CharactersWithSpaces { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of lines
|
||||
/// </summary>
|
||||
[JsonPropertyName("lines")]
|
||||
public int? Lines { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Number of paragraphs
|
||||
/// </summary>
|
||||
[JsonPropertyName("paragraphs")]
|
||||
public int? Paragraphs { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Company name
|
||||
/// </summary>
|
||||
[JsonPropertyName("company")]
|
||||
public string? Company { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document security level
|
||||
/// </summary>
|
||||
[JsonPropertyName("doc_security")]
|
||||
public int? DocSecurity { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Scale crop flag
|
||||
/// </summary>
|
||||
[JsonPropertyName("scale_crop")]
|
||||
public bool? ScaleCrop { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Links up to date flag
|
||||
/// </summary>
|
||||
[JsonPropertyName("links_up_to_date")]
|
||||
public bool? LinksUpToDate { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Shared document flag
|
||||
/// </summary>
|
||||
[JsonPropertyName("shared_doc")]
|
||||
public bool? SharedDoc { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Hyperlinks changed flag
|
||||
/// </summary>
|
||||
[JsonPropertyName("hyperlinks_changed")]
|
||||
public bool? HyperlinksChanged { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DocxAppProperties"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DocxAppProperties FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DocxAppProperties>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DocxAppProperties from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DocxAppProperties from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
86
packages/csharp/src/Kreuzberg/DocxMetadata.cs
generated
Normal file
86
packages/csharp/src/Kreuzberg/DocxMetadata.cs
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Word document metadata.
|
||||
///
|
||||
/// Extracted from DOCX files using shared Office Open XML metadata extraction.
|
||||
/// Integrates with `office_metadata` module for core/app/custom properties.
|
||||
/// </summary>
|
||||
public sealed record DocxMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Core properties from docProps/core.xml (Dublin Core metadata)
|
||||
///
|
||||
/// Contains title, creator, subject, keywords, dates, etc.
|
||||
/// Shared format across DOCX/PPTX/XLSX documents.
|
||||
/// </summary>
|
||||
[JsonPropertyName("core_properties")]
|
||||
public CoreProperties? CoreProperties { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Application properties from docProps/app.xml (Word-specific statistics)
|
||||
///
|
||||
/// Contains word count, page count, paragraph count, editing time, etc.
|
||||
/// DOCX-specific variant of Office application properties.
|
||||
/// </summary>
|
||||
[JsonPropertyName("app_properties")]
|
||||
public DocxAppProperties? AppProperties { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Custom properties from docProps/custom.xml (user-defined properties)
|
||||
///
|
||||
/// Contains key-value pairs defined by users or applications.
|
||||
/// Values can be strings, numbers, booleans, or dates.
|
||||
/// </summary>
|
||||
[JsonPropertyName("custom_properties")]
|
||||
public Dictionary<string, string>? CustomProperties { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="DocxMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static DocxMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<DocxMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse DocxMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse DocxMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
83
packages/csharp/src/Kreuzberg/Element.cs
generated
Normal file
83
packages/csharp/src/Kreuzberg/Element.cs
generated
Normal file
@@ -0,0 +1,83 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Semantic element extracted from document.
|
||||
///
|
||||
/// Represents a logical unit of content with semantic classification,
|
||||
/// unique identifier, and metadata for tracking origin and position.
|
||||
/// </summary>
|
||||
public sealed record Element
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique element identifier
|
||||
/// </summary>
|
||||
[JsonPropertyName("element_id")]
|
||||
public required string ElementId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Semantic type of this element
|
||||
/// </summary>
|
||||
[JsonPropertyName("element_type")]
|
||||
public required ElementType ElementType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Text content of the element
|
||||
/// </summary>
|
||||
[JsonPropertyName("text")]
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Metadata about the element
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata")]
|
||||
public required ElementMetadata Metadata { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="Element"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static Element FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<Element>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse Element from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse Element from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
86
packages/csharp/src/Kreuzberg/ElementMetadata.cs
generated
Normal file
86
packages/csharp/src/Kreuzberg/ElementMetadata.cs
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Metadata for a semantic element.
|
||||
/// </summary>
|
||||
public sealed record ElementMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Page number (1-indexed)
|
||||
/// </summary>
|
||||
[JsonPropertyName("page_number")]
|
||||
public uint? PageNumber { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Source filename or document name
|
||||
/// </summary>
|
||||
[JsonPropertyName("filename")]
|
||||
public string? Filename { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box coordinates if available
|
||||
/// </summary>
|
||||
[JsonPropertyName("coordinates")]
|
||||
public BoundingBox? Coordinates { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Position index in the element sequence
|
||||
/// </summary>
|
||||
[JsonPropertyName("element_index")]
|
||||
public ulong? ElementIndex { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Additional custom metadata
|
||||
/// </summary>
|
||||
[JsonPropertyName("additional")]
|
||||
public Dictionary<string, string> Additional { get; init; } = default!;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ElementMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ElementMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ElementMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ElementMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ElementMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
121
packages/csharp/src/Kreuzberg/ElementType.cs
generated
Normal file
121
packages/csharp/src/Kreuzberg/ElementType.cs
generated
Normal file
@@ -0,0 +1,121 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Semantic element type classification.
|
||||
///
|
||||
/// Categorizes text content into semantic units for downstream processing.
|
||||
/// Supports the element types commonly found in Unstructured documents.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ElementTypeJsonConverter))]
|
||||
public enum ElementType
|
||||
{
|
||||
/// <summary>
|
||||
/// Document title
|
||||
/// </summary>
|
||||
[JsonPropertyName("title")]
|
||||
Title,
|
||||
/// <summary>
|
||||
/// Main narrative text body
|
||||
/// </summary>
|
||||
[JsonPropertyName("narrative_text")]
|
||||
NarrativeText,
|
||||
/// <summary>
|
||||
/// Section heading
|
||||
/// </summary>
|
||||
[JsonPropertyName("heading")]
|
||||
Heading,
|
||||
/// <summary>
|
||||
/// List item (bullet, numbered, etc.)
|
||||
/// </summary>
|
||||
[JsonPropertyName("list_item")]
|
||||
ListItem,
|
||||
/// <summary>
|
||||
/// Table element
|
||||
/// </summary>
|
||||
[JsonPropertyName("table")]
|
||||
Table,
|
||||
/// <summary>
|
||||
/// Image element
|
||||
/// </summary>
|
||||
[JsonPropertyName("image")]
|
||||
Image,
|
||||
/// <summary>
|
||||
/// Page break marker
|
||||
/// </summary>
|
||||
[JsonPropertyName("page_break")]
|
||||
PageBreak,
|
||||
/// <summary>
|
||||
/// Code block
|
||||
/// </summary>
|
||||
[JsonPropertyName("code_block")]
|
||||
CodeBlock,
|
||||
/// <summary>
|
||||
/// Block quote
|
||||
/// </summary>
|
||||
[JsonPropertyName("block_quote")]
|
||||
BlockQuote,
|
||||
/// <summary>
|
||||
/// Footer text
|
||||
/// </summary>
|
||||
[JsonPropertyName("footer")]
|
||||
Footer,
|
||||
/// <summary>
|
||||
/// Header text
|
||||
/// </summary>
|
||||
[JsonPropertyName("header")]
|
||||
Header,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ElementType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ElementTypeJsonConverter : JsonConverter<ElementType>
|
||||
{
|
||||
public override ElementType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"title" => ElementType.Title,
|
||||
"narrative_text" => ElementType.NarrativeText,
|
||||
"heading" => ElementType.Heading,
|
||||
"list_item" => ElementType.ListItem,
|
||||
"table" => ElementType.Table,
|
||||
"image" => ElementType.Image,
|
||||
"page_break" => ElementType.PageBreak,
|
||||
"code_block" => ElementType.CodeBlock,
|
||||
"block_quote" => ElementType.BlockQuote,
|
||||
"footer" => ElementType.Footer,
|
||||
"header" => ElementType.Header,
|
||||
_ => throw new JsonException($"Unknown ElementType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ElementType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ElementType.Title => "title",
|
||||
ElementType.NarrativeText => "narrative_text",
|
||||
ElementType.Heading => "heading",
|
||||
ElementType.ListItem => "list_item",
|
||||
ElementType.Table => "table",
|
||||
ElementType.Image => "image",
|
||||
ElementType.PageBreak => "page_break",
|
||||
ElementType.CodeBlock => "code_block",
|
||||
ElementType.BlockQuote => "block_quote",
|
||||
ElementType.Footer => "footer",
|
||||
ElementType.Header => "header",
|
||||
_ => throw new JsonException($"Unknown ElementType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
95
packages/csharp/src/Kreuzberg/EmailAttachment.cs
generated
Normal file
95
packages/csharp/src/Kreuzberg/EmailAttachment.cs
generated
Normal file
@@ -0,0 +1,95 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Email attachment representation.
|
||||
///
|
||||
/// Contains metadata and optionally the content of an email attachment.
|
||||
/// </summary>
|
||||
public sealed record EmailAttachment
|
||||
{
|
||||
/// <summary>
|
||||
/// Attachment name (from Content-Disposition header)
|
||||
/// </summary>
|
||||
[JsonPropertyName("name")]
|
||||
public string? Name { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Filename of the attachment
|
||||
/// </summary>
|
||||
[JsonPropertyName("filename")]
|
||||
public string? Filename { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// MIME type of the attachment
|
||||
/// </summary>
|
||||
[JsonPropertyName("mime_type")]
|
||||
public string? MimeType { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Size in bytes
|
||||
/// </summary>
|
||||
[JsonPropertyName("size")]
|
||||
public ulong? Size { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Whether this attachment is an image
|
||||
/// </summary>
|
||||
[JsonPropertyName("is_image")]
|
||||
public bool IsImage { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Attachment data (if extracted).
|
||||
/// Uses `bytes.Bytes` for cheap cloning of large buffers.
|
||||
/// </summary>
|
||||
[JsonPropertyName("data")]
|
||||
public byte[]? Data { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmailAttachment"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmailAttachment FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmailAttachment>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmailAttachment from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmailAttachment from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
79
packages/csharp/src/Kreuzberg/EmailConfig.cs
generated
Normal file
79
packages/csharp/src/Kreuzberg/EmailConfig.cs
generated
Normal file
@@ -0,0 +1,79 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for email extraction.
|
||||
/// </summary>
|
||||
public sealed record EmailConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Windows codepage number to use when an MSG file contains no codepage property.
|
||||
/// Defaults to `None`, which falls back to windows-1252.
|
||||
///
|
||||
/// If an unrecognized or invalid codepage number is supplied (including 0),
|
||||
/// the behavior silently falls back to windows-1252 — the same as when the
|
||||
/// MSG file itself contains an unrecognized codepage. No error or warning is
|
||||
/// emitted. Users should verify output when supplying unusual values.
|
||||
///
|
||||
/// Common values:
|
||||
/// - 1250: Central European (Polish, Czech, Hungarian, etc.)
|
||||
/// - 1251: Cyrillic (Russian, Ukrainian, Bulgarian, etc.)
|
||||
/// - 1252: Western European (default)
|
||||
/// - 1253: Greek
|
||||
/// - 1254: Turkish
|
||||
/// - 1255: Hebrew
|
||||
/// - 1256: Arabic
|
||||
/// - 932: Japanese (Shift-JIS)
|
||||
/// - 936: Simplified Chinese (GBK)
|
||||
/// </summary>
|
||||
[JsonPropertyName("msg_fallback_codepage")]
|
||||
public uint? MsgFallbackCodepage { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmailConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmailConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmailConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmailConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmailConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
131
packages/csharp/src/Kreuzberg/EmailExtractionResult.cs
generated
Normal file
131
packages/csharp/src/Kreuzberg/EmailExtractionResult.cs
generated
Normal file
@@ -0,0 +1,131 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Email extraction result.
|
||||
///
|
||||
/// Complete representation of an extracted email message (.eml or .msg)
|
||||
/// including headers, body content, and attachments.
|
||||
/// </summary>
|
||||
public sealed record EmailExtractionResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Email subject line
|
||||
/// </summary>
|
||||
[JsonPropertyName("subject")]
|
||||
public string? Subject { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Sender email address
|
||||
/// </summary>
|
||||
[JsonPropertyName("from_email")]
|
||||
public string? FromEmail { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Primary recipient email addresses
|
||||
/// </summary>
|
||||
[JsonPropertyName("to_emails")]
|
||||
public List<string> ToEmails { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// CC recipient email addresses
|
||||
/// </summary>
|
||||
[JsonPropertyName("cc_emails")]
|
||||
public List<string> CcEmails { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// BCC recipient email addresses
|
||||
/// </summary>
|
||||
[JsonPropertyName("bcc_emails")]
|
||||
public List<string> BccEmails { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Email date/timestamp
|
||||
/// </summary>
|
||||
[JsonPropertyName("date")]
|
||||
public string? Date { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Message-ID header value
|
||||
/// </summary>
|
||||
[JsonPropertyName("message_id")]
|
||||
public string? MessageId { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Plain text version of the email body
|
||||
/// </summary>
|
||||
[JsonPropertyName("plain_text")]
|
||||
public string? PlainText { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// HTML version of the email body
|
||||
/// </summary>
|
||||
[JsonPropertyName("html_content")]
|
||||
public string? HtmlContent { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Cleaned/processed text content. Aliased as `cleaned_text` for back-compat.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content")]
|
||||
public required string Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// List of email attachments
|
||||
/// </summary>
|
||||
[JsonPropertyName("attachments")]
|
||||
public List<EmailAttachment> Attachments { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Additional email headers and metadata
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata")]
|
||||
public Dictionary<string, string> Metadata { get; init; } = default!;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmailExtractionResult"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmailExtractionResult FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmailExtractionResult>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmailExtractionResult from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmailExtractionResult from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
100
packages/csharp/src/Kreuzberg/EmailMetadata.cs
generated
Normal file
100
packages/csharp/src/Kreuzberg/EmailMetadata.cs
generated
Normal file
@@ -0,0 +1,100 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Email metadata extracted from .eml and .msg files.
|
||||
///
|
||||
/// Includes sender/recipient information, message ID, and attachment list.
|
||||
/// </summary>
|
||||
public sealed record EmailMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Sender's email address
|
||||
/// </summary>
|
||||
[JsonPropertyName("from_email")]
|
||||
public string? FromEmail { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Sender's display name
|
||||
/// </summary>
|
||||
[JsonPropertyName("from_name")]
|
||||
public string? FromName { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Primary recipients
|
||||
/// </summary>
|
||||
[JsonPropertyName("to_emails")]
|
||||
public List<string> ToEmails { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// CC recipients
|
||||
/// </summary>
|
||||
[JsonPropertyName("cc_emails")]
|
||||
public List<string> CcEmails { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// BCC recipients
|
||||
/// </summary>
|
||||
[JsonPropertyName("bcc_emails")]
|
||||
public List<string> BccEmails { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Message-ID header value
|
||||
/// </summary>
|
||||
[JsonPropertyName("message_id")]
|
||||
public string? MessageId { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// List of attachment filenames
|
||||
/// </summary>
|
||||
[JsonPropertyName("attachments")]
|
||||
public List<string> Attachments { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmailMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmailMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmailMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmailMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmailMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
76
packages/csharp/src/Kreuzberg/EmbeddedChanges.cs
generated
Normal file
76
packages/csharp/src/Kreuzberg/EmbeddedChanges.cs
generated
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Changes to embedded archive children between two results.
|
||||
/// </summary>
|
||||
public sealed record EmbeddedChanges
|
||||
{
|
||||
/// <summary>
|
||||
/// Children present in `b` but not in `a` (matched by `path`).
|
||||
/// </summary>
|
||||
[JsonPropertyName("added")]
|
||||
public List<ArchiveEntry> Added { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Children present in `a` but not in `b` (matched by `path`).
|
||||
/// </summary>
|
||||
[JsonPropertyName("removed")]
|
||||
public List<ArchiveEntry> Removed { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Children present in both but with differing content (matched by `path`).
|
||||
///
|
||||
/// Each entry holds the diff of the nested `ExtractionResult`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("changed")]
|
||||
public List<EmbeddedDiff> Changed { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmbeddedChanges"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmbeddedChanges FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmbeddedChanges>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmbeddedChanges from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmbeddedChanges from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/EmbeddedDiff.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/EmbeddedDiff.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Diff for a single embedded archive entry that appears in both results.
|
||||
/// </summary>
|
||||
public sealed record EmbeddedDiff
|
||||
{
|
||||
/// <summary>
|
||||
/// Archive-relative path identifying this entry.
|
||||
/// </summary>
|
||||
[JsonPropertyName("path")]
|
||||
public required string Path { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The recursive diff of the entry's extraction result.
|
||||
/// </summary>
|
||||
[JsonPropertyName("diff")]
|
||||
public required ExtractionDiff Diff { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmbeddedDiff"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmbeddedDiff FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmbeddedDiff>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmbeddedDiff from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmbeddedDiff from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
84
packages/csharp/src/Kreuzberg/EmbeddedFile.cs
generated
Normal file
84
packages/csharp/src/Kreuzberg/EmbeddedFile.cs
generated
Normal file
@@ -0,0 +1,84 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Embedded file descriptor extracted from the PDF name tree.
|
||||
/// </summary>
|
||||
public sealed record EmbeddedFile
|
||||
{
|
||||
/// <summary>
|
||||
/// The filename as stored in the PDF name tree.
|
||||
/// </summary>
|
||||
[JsonPropertyName("name")]
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Raw file bytes from the embedded stream (already decompressed by lopdf).
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ByteArrayToIntArrayConverter))]
|
||||
[JsonPropertyName("data")]
|
||||
public byte[] Data { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Compressed byte count of the original stream (before decompression).
|
||||
///
|
||||
/// Used by callers to compute the decompression ratio and detect zip-bomb-style
|
||||
/// attacks that embed a tiny compressed stream expanding to gigabytes of data.
|
||||
/// </summary>
|
||||
[JsonPropertyName("compressed_size")]
|
||||
public ulong CompressedSize { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// MIME type if specified in the filespec, otherwise `None`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("mime_type")]
|
||||
public string? MimeType { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmbeddedFile"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmbeddedFile FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmbeddedFile>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmbeddedFile from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmbeddedFile from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
127
packages/csharp/src/Kreuzberg/EmbeddingConfig.cs
generated
Normal file
127
packages/csharp/src/Kreuzberg/EmbeddingConfig.cs
generated
Normal file
@@ -0,0 +1,127 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Embedding configuration for text chunks.
|
||||
///
|
||||
/// Configures embedding generation using ONNX models via the vendored embedding engine.
|
||||
/// Requires the `embeddings` feature to be enabled.
|
||||
/// </summary>
|
||||
public sealed record EmbeddingConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// The embedding model to use (defaults to "balanced" preset if not specified)
|
||||
/// </summary>
|
||||
[JsonPropertyName("model")]
|
||||
public EmbeddingModelType? Model { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to normalize embedding vectors (recommended for cosine similarity)
|
||||
/// </summary>
|
||||
[JsonPropertyName("normalize")]
|
||||
public bool Normalize { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Batch size for embedding generation
|
||||
/// </summary>
|
||||
[JsonPropertyName("batch_size")]
|
||||
public ulong BatchSize { get; init; } = 32;
|
||||
|
||||
/// <summary>
|
||||
/// Show model download progress
|
||||
/// </summary>
|
||||
[JsonPropertyName("show_download_progress")]
|
||||
public bool ShowDownloadProgress { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Custom cache directory for model files
|
||||
///
|
||||
/// Defaults to `~/.cache/kreuzberg/embeddings/` if not specified.
|
||||
/// Allows full customization of model download location.
|
||||
/// </summary>
|
||||
[JsonPropertyName("cache_dir")]
|
||||
public string? CacheDir { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Hardware acceleration for the embedding ONNX model.
|
||||
///
|
||||
/// When set, controls which execution provider (CPU, CUDA, CoreML, TensorRT)
|
||||
/// is used for inference. Defaults to `None` (auto-select per platform).
|
||||
/// </summary>
|
||||
[JsonPropertyName("acceleration")]
|
||||
public AccelerationConfig? Acceleration { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum wall-clock duration (in seconds) for a single `embed()` call when
|
||||
/// using `EmbeddingModelType.Plugin`.
|
||||
///
|
||||
/// Applies only to the in-process plugin path — protects against hung
|
||||
/// host-language backends (e.g. a Python callback deadlocked on the GIL,
|
||||
/// a model stuck on CUDA OOM retries, etc.). On timeout, the dispatcher
|
||||
/// returns `Plugin` instead of blocking forever.
|
||||
///
|
||||
/// `None` disables the timeout. The default (60 seconds) is conservative
|
||||
/// for common in-process inference; increase for large batches on slow
|
||||
/// hardware.
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_embed_duration_secs")]
|
||||
public ulong? MaxEmbedDurationSecs { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmbeddingConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmbeddingConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmbeddingConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmbeddingConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmbeddingConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static EmbeddingConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.EmbeddingConfigDefault();
|
||||
var jsonPtr = NativeMethods.EmbeddingConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.EmbeddingConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<EmbeddingConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
14
packages/csharp/src/Kreuzberg/EmbeddingException.cs
generated
Normal file
14
packages/csharp/src/Kreuzberg/EmbeddingException.cs
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
public class EmbeddingException : KreuzbergErrorException
|
||||
{
|
||||
public EmbeddingException(string message) : base(message) { }
|
||||
|
||||
public EmbeddingException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
185
packages/csharp/src/Kreuzberg/EmbeddingModelType.cs
generated
Normal file
185
packages/csharp/src/Kreuzberg/EmbeddingModelType.cs
generated
Normal file
@@ -0,0 +1,185 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Embedding model types supported by Kreuzberg.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(EmbeddingModelTypeJsonConverter))]
|
||||
public abstract record EmbeddingModelType
|
||||
{
|
||||
/// <summary>
|
||||
/// Use a preset model configuration (recommended)
|
||||
/// </summary>
|
||||
public sealed record Preset(
|
||||
[property: JsonPropertyName("name")] string Name
|
||||
) : EmbeddingModelType;
|
||||
|
||||
/// <summary>
|
||||
/// Use a custom ONNX model from HuggingFace
|
||||
/// </summary>
|
||||
public sealed record Custom(
|
||||
[property: JsonPropertyName("model_id")] string ModelId,
|
||||
[property: JsonPropertyName("dimensions")] ulong Dimensions
|
||||
) : EmbeddingModelType;
|
||||
|
||||
/// <summary>
|
||||
/// Provider-hosted embedding model via liter-llm.
|
||||
///
|
||||
/// Uses the model specified in the nested `LlmConfig` (e.g.,
|
||||
/// `"openai/text-embedding-3-small"`).
|
||||
/// </summary>
|
||||
public sealed record Llm(
|
||||
[property: JsonPropertyName("llm")] LlmConfig Value
|
||||
) : EmbeddingModelType;
|
||||
|
||||
/// <summary>
|
||||
/// In-process embedding backend registered via the plugin system.
|
||||
///
|
||||
/// The caller registers an `EmbeddingBackend`(crate.plugins.EmbeddingBackend) once
|
||||
/// (e.g. a wrapper around an already-loaded `llama-cpp-python`, `sentence-transformers`,
|
||||
/// or tuned ONNX model), then references it by name in config. Kreuzberg calls back
|
||||
/// into the registered backend during chunking and standalone embed requests —
|
||||
/// no HuggingFace download, no ONNX Runtime requirement, no HTTP sidecar.
|
||||
///
|
||||
/// When this variant is selected, only the following `EmbeddingConfig` fields
|
||||
/// apply: `normalize` (post-call L2 normalization) and `max_embed_duration_secs`
|
||||
/// (dispatcher timeout). Model-loading fields (`batch_size`, `cache_dir`,
|
||||
/// `show_download_progress`, `acceleration`) are ignored — the host owns the
|
||||
/// model lifecycle.
|
||||
///
|
||||
/// Semantic chunking falls back to `ChunkingConfig.max_characters` when this variant
|
||||
/// is used, since there is no preset to look a chunk-size ceiling up against — size your
|
||||
/// context window via `max_characters` directly.
|
||||
///
|
||||
/// See `register_embedding_backend`.
|
||||
/// </summary>
|
||||
public sealed record Plugin(
|
||||
[property: JsonPropertyName("name")] string Name
|
||||
) : EmbeddingModelType;
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Custom converter for EmbeddingModelType sealed union with flattened variant fields.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles JSON objects with a discriminator field (type) and variant-specific
|
||||
/// fields at the same level. System.Text.Json's [JsonPolymorphic] cannot handle
|
||||
/// this layout, so we manually deserialize here.
|
||||
/// </remarks>
|
||||
public sealed class EmbeddingModelTypeJsonConverter : JsonConverter<EmbeddingModelType>
|
||||
{
|
||||
public override EmbeddingModelType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
if (reader.TokenType != JsonTokenType.StartObject)
|
||||
{
|
||||
throw new JsonException($"Expected JSON object, got {reader.TokenType}");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.ParseValue(ref reader);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("type", out var tagElement))
|
||||
{
|
||||
throw new JsonException($"Missing discriminator field: type");
|
||||
}
|
||||
|
||||
var tagValue = tagElement.GetString();
|
||||
if (tagValue == null)
|
||||
{
|
||||
throw new JsonException("Discriminator field is null");
|
||||
}
|
||||
|
||||
// Tuple-variant records (`Variant(InnerStruct value)`) expect a single
|
||||
// "Value" field holding the inner struct's JSON, so wrap the remaining
|
||||
// fields under "Value". Struct-variant records (`Variant { field1,
|
||||
// field2 }`) have positional record components annotated with
|
||||
// [JsonPropertyName(...)] for each named field, so pass the remaining
|
||||
// fields through directly without the wrap.
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new Utf8JsonWriter(ms);
|
||||
writer.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "type")
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
var flatJson = ms.ToArray();
|
||||
|
||||
using var msWrapped = new MemoryStream();
|
||||
using var writerWrapped = new Utf8JsonWriter(msWrapped);
|
||||
writerWrapped.WriteStartObject();
|
||||
writerWrapped.WritePropertyName("Value");
|
||||
writerWrapped.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "type")
|
||||
{
|
||||
writerWrapped.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writerWrapped);
|
||||
}
|
||||
}
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.Flush();
|
||||
msWrapped.Position = 0;
|
||||
var wrappedJson = msWrapped.ToArray();
|
||||
|
||||
return tagValue switch
|
||||
{ "preset" => JsonSerializer.Deserialize<EmbeddingModelType.Preset>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), "custom" => JsonSerializer.Deserialize<EmbeddingModelType.Custom>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), "llm" => JsonSerializer.Deserialize<EmbeddingModelType.Llm>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), "plugin" => JsonSerializer.Deserialize<EmbeddingModelType.Plugin>(flatJson, options) ?? throw new JsonException("Failed to deserialize variant"), _ => throw new JsonException($"Unknown EmbeddingModelType discriminator: {tagValue}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, EmbeddingModelType value, JsonSerializerOptions options)
|
||||
{
|
||||
// Emit the discriminator tag plus the inner variant's fields flattened at
|
||||
// the same level — mirrors the Java sealed-union serializer pattern. Turn
|
||||
// `Message.User(UserMessage value)` into `{"type":"user","content":...}`
|
||||
// not `{"value":{...}}`. Without this, sending a chat request to FFI fails
|
||||
// with "missing field type" inside Rust serde.
|
||||
string tag;
|
||||
object? inner;
|
||||
switch (value)
|
||||
{ case EmbeddingModelType.Preset v_preset:
|
||||
tag = "preset"; inner = v_preset; break; case EmbeddingModelType.Custom v_custom:
|
||||
tag = "custom"; inner = v_custom; break; case EmbeddingModelType.Llm v_llm:
|
||||
tag = "llm"; inner = v_llm; break; case EmbeddingModelType.Plugin v_plugin:
|
||||
tag = "plugin"; inner = v_plugin; break; default:
|
||||
throw new JsonException($"Unknown EmbeddingModelType variant: {value.GetType().Name}");
|
||||
}
|
||||
|
||||
writer.WriteStartObject();
|
||||
writer.WriteString("type", tag);
|
||||
if (inner != null)
|
||||
{
|
||||
using var doc = JsonSerializer.SerializeToDocument(inner, inner.GetType(), options);
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in doc.RootElement.EnumerateObject())
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
}
|
||||
}
|
||||
95
packages/csharp/src/Kreuzberg/EmbeddingPreset.cs
generated
Normal file
95
packages/csharp/src/Kreuzberg/EmbeddingPreset.cs
generated
Normal file
@@ -0,0 +1,95 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Preset configurations for common RAG use cases.
|
||||
///
|
||||
/// Each preset combines chunk size, overlap, and embedding model
|
||||
/// to provide an optimized configuration for specific scenarios.
|
||||
///
|
||||
/// All string fields are owned `String` for FFI compatibility — instances
|
||||
/// are safe to clone and pass across language boundaries.
|
||||
/// </summary>
|
||||
public sealed record EmbeddingPreset
|
||||
{
|
||||
[JsonPropertyName("name")]
|
||||
public required string Name { get; init; }
|
||||
|
||||
[JsonPropertyName("chunk_size")]
|
||||
public ulong ChunkSize { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("overlap")]
|
||||
public ulong Overlap { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// HuggingFace repository name for the model.
|
||||
/// </summary>
|
||||
[JsonPropertyName("model_repo")]
|
||||
public required string ModelRepo { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Pooling strategy: "cls" or "mean".
|
||||
/// </summary>
|
||||
[JsonPropertyName("pooling")]
|
||||
public required string Pooling { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Path to the ONNX model file within the repo.
|
||||
/// </summary>
|
||||
[JsonPropertyName("model_file")]
|
||||
public required string ModelFile { get; init; }
|
||||
|
||||
[JsonPropertyName("dimensions")]
|
||||
public ulong Dimensions { get; init; } = 0;
|
||||
|
||||
[JsonPropertyName("description")]
|
||||
public required string Description { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EmbeddingPreset"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EmbeddingPreset FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EmbeddingPreset>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EmbeddingPreset from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EmbeddingPreset from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
74
packages/csharp/src/Kreuzberg/EpubMetadata.cs
generated
Normal file
74
packages/csharp/src/Kreuzberg/EpubMetadata.cs
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// EPUB metadata (Dublin Core extensions).
|
||||
/// </summary>
|
||||
public sealed record EpubMetadata
|
||||
{
|
||||
[JsonPropertyName("coverage")]
|
||||
public string? Coverage { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("dc_format")]
|
||||
public string? DcFormat { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("relation")]
|
||||
public string? Relation { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("source")]
|
||||
public string? Source { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("dc_type")]
|
||||
public string? DcType { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("cover_image")]
|
||||
public string? CoverImage { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="EpubMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static EpubMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<EpubMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse EpubMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse EpubMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
62
packages/csharp/src/Kreuzberg/ErrorMetadata.cs
generated
Normal file
62
packages/csharp/src/Kreuzberg/ErrorMetadata.cs
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Error metadata (for batch operations).
|
||||
/// </summary>
|
||||
public sealed record ErrorMetadata
|
||||
{
|
||||
[JsonPropertyName("error_type")]
|
||||
public required string ErrorType { get; init; }
|
||||
|
||||
[JsonPropertyName("message")]
|
||||
public required string Message { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ErrorMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ErrorMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ErrorMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ErrorMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ErrorMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
71
packages/csharp/src/Kreuzberg/ExcelMetadata.cs
generated
Normal file
71
packages/csharp/src/Kreuzberg/ExcelMetadata.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Excel/spreadsheet format metadata.
|
||||
///
|
||||
/// Identifies the document as a spreadsheet source via the `FormatMetadata.Excel`
|
||||
/// discriminant. Sheet count and sheet names are stored inside this struct.
|
||||
/// </summary>
|
||||
public sealed record ExcelMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Number of sheets in the workbook.
|
||||
/// </summary>
|
||||
[JsonPropertyName("sheet_count")]
|
||||
public uint? SheetCount { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Names of all sheets in the workbook.
|
||||
/// </summary>
|
||||
[JsonPropertyName("sheet_names")]
|
||||
public List<string>? SheetNames { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExcelMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExcelMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExcelMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExcelMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExcelMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
97
packages/csharp/src/Kreuzberg/ExcelSheet.cs
generated
Normal file
97
packages/csharp/src/Kreuzberg/ExcelSheet.cs
generated
Normal file
@@ -0,0 +1,97 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Single Excel worksheet.
|
||||
///
|
||||
/// Represents one sheet from an Excel workbook with its content
|
||||
/// converted to Markdown format and dimensional statistics.
|
||||
/// </summary>
|
||||
public sealed record ExcelSheet
|
||||
{
|
||||
/// <summary>
|
||||
/// Sheet name as it appears in Excel
|
||||
/// </summary>
|
||||
[JsonPropertyName("name")]
|
||||
public required string Name { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Sheet content converted to Markdown tables
|
||||
/// </summary>
|
||||
[JsonPropertyName("markdown")]
|
||||
public required string Markdown { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of rows
|
||||
/// </summary>
|
||||
[JsonPropertyName("row_count")]
|
||||
public ulong RowCount { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Number of columns
|
||||
/// </summary>
|
||||
[JsonPropertyName("col_count")]
|
||||
public ulong ColCount { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Total number of non-empty cells
|
||||
/// </summary>
|
||||
[JsonPropertyName("cell_count")]
|
||||
public ulong CellCount { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Pre-extracted table cells (2D vector of cell values)
|
||||
/// Populated during markdown generation to avoid re-parsing markdown.
|
||||
/// null for empty sheets.
|
||||
/// </summary>
|
||||
[JsonPropertyName("table_cells")]
|
||||
public List<List<string>>? TableCells { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExcelSheet"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExcelSheet FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExcelSheet>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExcelSheet from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExcelSheet from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
84
packages/csharp/src/Kreuzberg/ExcelWorkbook.cs
generated
Normal file
84
packages/csharp/src/Kreuzberg/ExcelWorkbook.cs
generated
Normal file
@@ -0,0 +1,84 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Excel workbook representation.
|
||||
///
|
||||
/// Contains all sheets from an Excel file (.xlsx, .xls, etc.) with
|
||||
/// extracted content and metadata.
|
||||
/// </summary>
|
||||
public sealed record ExcelWorkbook
|
||||
{
|
||||
/// <summary>
|
||||
/// All sheets in the workbook
|
||||
/// </summary>
|
||||
[JsonPropertyName("sheets")]
|
||||
public List<ExcelSheet> Sheets { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Workbook-level metadata (author, creation date, etc.)
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata")]
|
||||
public Dictionary<string, string> Metadata { get; init; } = default!;
|
||||
|
||||
/// <summary>
|
||||
/// Collaborative-edit revision headers from `xl/revisions/revisionHeaders.xml`.
|
||||
///
|
||||
/// Populated for legacy shared-workbook `.xlsx` files that contain the
|
||||
/// `xl/revisions/` directory. Each `<header>` element maps to one
|
||||
/// `DocumentRevision { kind: FormatChange }` carrying the header's `guid`
|
||||
/// (→ `revision_id`), `userName` (→ `author`), and `dateTime` (→ `timestamp`).
|
||||
/// `anchor` and `delta` are `None`/empty for v1 (per-cell log parsing is a
|
||||
/// follow-up). `None` when `xl/revisions/revisionHeaders.xml` is absent.
|
||||
/// </summary>
|
||||
[JsonPropertyName("revisions")]
|
||||
public List<DocumentRevision>? Revisions { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExcelWorkbook"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExcelWorkbook FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExcelWorkbook>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExcelWorkbook from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExcelWorkbook from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
79
packages/csharp/src/Kreuzberg/ExecutionProviderType.cs
generated
Normal file
79
packages/csharp/src/Kreuzberg/ExecutionProviderType.cs
generated
Normal file
@@ -0,0 +1,79 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// ONNX Runtime execution provider type.
|
||||
///
|
||||
/// Determines which hardware backend is used for model inference.
|
||||
/// `Auto` (default) selects the best available provider per platform.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ExecutionProviderTypeJsonConverter))]
|
||||
public enum ExecutionProviderType
|
||||
{
|
||||
/// <summary>
|
||||
/// Auto-select: CoreML on macOS, CUDA on Linux, CPU elsewhere.
|
||||
/// </summary>
|
||||
[JsonPropertyName("auto")]
|
||||
Auto,
|
||||
/// <summary>
|
||||
/// CPU execution provider (always available).
|
||||
/// </summary>
|
||||
[JsonPropertyName("cpu")]
|
||||
Cpu,
|
||||
/// <summary>
|
||||
/// Apple CoreML (macOS/iOS Neural Engine + GPU).
|
||||
/// </summary>
|
||||
[JsonPropertyName("coreml")]
|
||||
CoreMl,
|
||||
/// <summary>
|
||||
/// NVIDIA CUDA GPU acceleration.
|
||||
/// </summary>
|
||||
[JsonPropertyName("cuda")]
|
||||
Cuda,
|
||||
/// <summary>
|
||||
/// NVIDIA TensorRT (optimized CUDA inference).
|
||||
/// </summary>
|
||||
[JsonPropertyName("tensorrt")]
|
||||
TensorRt,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ExecutionProviderType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ExecutionProviderTypeJsonConverter : JsonConverter<ExecutionProviderType>
|
||||
{
|
||||
public override ExecutionProviderType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"auto" => ExecutionProviderType.Auto,
|
||||
"cpu" => ExecutionProviderType.Cpu,
|
||||
"coreml" => ExecutionProviderType.CoreMl,
|
||||
"cuda" => ExecutionProviderType.Cuda,
|
||||
"tensorrt" => ExecutionProviderType.TensorRt,
|
||||
_ => throw new JsonException($"Unknown ExecutionProviderType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ExecutionProviderType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ExecutionProviderType.Auto => "auto",
|
||||
ExecutionProviderType.Cpu => "cpu",
|
||||
ExecutionProviderType.CoreMl => "coreml",
|
||||
ExecutionProviderType.Cuda => "cuda",
|
||||
ExecutionProviderType.TensorRt => "tensorrt",
|
||||
_ => throw new JsonException($"Unknown ExecutionProviderType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
166
packages/csharp/src/Kreuzberg/ExtractedImage.cs
generated
Normal file
166
packages/csharp/src/Kreuzberg/ExtractedImage.cs
generated
Normal file
@@ -0,0 +1,166 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Extracted image from a document.
|
||||
///
|
||||
/// Contains raw image data, metadata, and optional nested OCR results.
|
||||
/// Raw bytes allow cross-language compatibility - users can convert to
|
||||
/// PIL.Image (Python), Sharp (Node.js), or other formats as needed.
|
||||
/// </summary>
|
||||
public sealed record ExtractedImage
|
||||
{
|
||||
/// <summary>
|
||||
/// Raw image data (PNG, JPEG, WebP, etc. bytes).
|
||||
/// Uses `bytes.Bytes` for cheap cloning of large buffers.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ByteArrayToIntArrayConverter))]
|
||||
[JsonPropertyName("data")]
|
||||
public byte[] Data { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Image format (e.g., "jpeg", "png", "webp")
|
||||
/// Uses Cow<, str> to avoid allocation for static literals.
|
||||
/// </summary>
|
||||
[JsonPropertyName("format")]
|
||||
public required string Format { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Zero-indexed position of this image in the document/page
|
||||
/// </summary>
|
||||
[JsonPropertyName("image_index")]
|
||||
public uint ImageIndex { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Page/slide number where image was found (1-indexed)
|
||||
/// </summary>
|
||||
[JsonPropertyName("page_number")]
|
||||
public uint? PageNumber { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Image width in pixels
|
||||
/// </summary>
|
||||
[JsonPropertyName("width")]
|
||||
public uint? Width { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Image height in pixels
|
||||
/// </summary>
|
||||
[JsonPropertyName("height")]
|
||||
public uint? Height { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Colorspace information (e.g., "RGB", "CMYK", "Gray")
|
||||
/// </summary>
|
||||
[JsonPropertyName("colorspace")]
|
||||
public string? Colorspace { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Bits per color component (e.g., 8, 16)
|
||||
/// </summary>
|
||||
[JsonPropertyName("bits_per_component")]
|
||||
public uint? BitsPerComponent { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Whether this image is a mask image
|
||||
/// </summary>
|
||||
[JsonPropertyName("is_mask")]
|
||||
public bool IsMask { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Optional description of the image
|
||||
/// </summary>
|
||||
[JsonPropertyName("description")]
|
||||
public string? Description { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Nested OCR extraction result (if image was OCRed)
|
||||
///
|
||||
/// When OCR is performed on this image, the result is embedded here
|
||||
/// rather than in a separate collection, making the relationship explicit.
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr_result")]
|
||||
public ExtractionResult? OcrResult { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box of the image on the page (PDF coordinates: x0=left, y0=bottom, x1=right, y1=top).
|
||||
/// Only populated for PDF-extracted images when position data is available from the PDF extractor.
|
||||
/// </summary>
|
||||
[JsonPropertyName("bounding_box")]
|
||||
public BoundingBox? BoundingBox { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Original source path of the image within the document archive (e.g., "media/image1.png" in DOCX).
|
||||
/// Used for rendering image references when the binary data is not extracted.
|
||||
/// </summary>
|
||||
[JsonPropertyName("source_path")]
|
||||
public string? SourcePath { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Heuristic classification of what this image likely depicts.
|
||||
/// `None` if classification was disabled or inconclusive.
|
||||
/// </summary>
|
||||
[JsonPropertyName("image_kind")]
|
||||
public ImageKind? ImageKind { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Confidence score for `image_kind`, in the range 0.0 to 1.0.
|
||||
/// </summary>
|
||||
[JsonPropertyName("kind_confidence")]
|
||||
public float? KindConfidence { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Identifier shared across images that form a single logical figure
|
||||
/// (e.g. all raster tiles of one technical drawing). `None` for singletons.
|
||||
/// </summary>
|
||||
[JsonPropertyName("cluster_id")]
|
||||
public uint? ClusterId { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExtractedImage"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExtractedImage FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExtractedImage>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExtractedImage from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExtractedImage from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
84
packages/csharp/src/Kreuzberg/ExtractedUri.cs
generated
Normal file
84
packages/csharp/src/Kreuzberg/ExtractedUri.cs
generated
Normal file
@@ -0,0 +1,84 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A URI extracted from a document.
|
||||
///
|
||||
/// Represents any link, reference, or resource pointer found during extraction.
|
||||
/// The `kind` field classifies the URI semantically, while `label` carries
|
||||
/// optional human-readable display text.
|
||||
/// </summary>
|
||||
public sealed record ExtractedUri
|
||||
{
|
||||
/// <summary>
|
||||
/// The URL or path string.
|
||||
/// </summary>
|
||||
[JsonPropertyName("url")]
|
||||
public required string Url { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional display text / label for the link.
|
||||
/// </summary>
|
||||
[JsonPropertyName("label")]
|
||||
public string? Label { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Optional page number where the URI was found (1-indexed).
|
||||
/// </summary>
|
||||
[JsonPropertyName("page")]
|
||||
public uint? Page { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Semantic classification of the URI.
|
||||
/// </summary>
|
||||
[JsonPropertyName("kind")]
|
||||
public required UriKind Kind { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExtractedUri"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExtractedUri FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExtractedUri>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExtractedUri from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExtractedUri from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
405
packages/csharp/src/Kreuzberg/ExtractionConfig.cs
generated
Normal file
405
packages/csharp/src/Kreuzberg/ExtractionConfig.cs
generated
Normal file
@@ -0,0 +1,405 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Main extraction configuration.
|
||||
///
|
||||
/// This struct contains all configuration options for the extraction process.
|
||||
/// It can be loaded from TOML, YAML, or JSON files, or created programmatically.
|
||||
/// </summary>
|
||||
public sealed record ExtractionConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Enable caching of extraction results
|
||||
/// </summary>
|
||||
[JsonPropertyName("use_cache")]
|
||||
public bool UseCache { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Enable quality post-processing
|
||||
/// </summary>
|
||||
[JsonPropertyName("enable_quality_processing")]
|
||||
public bool EnableQualityProcessing { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// OCR configuration (null = OCR disabled)
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr")]
|
||||
public OcrConfig? Ocr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Force OCR even for searchable PDFs
|
||||
/// </summary>
|
||||
[JsonPropertyName("force_ocr")]
|
||||
public bool ForceOcr { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Force OCR on specific pages only (1-indexed page numbers, must be >= 1).
|
||||
///
|
||||
/// When set, only the listed pages are OCR'd regardless of text layer quality.
|
||||
/// Unlisted pages use native text extraction. Ignored when `force_ocr` is `true`.
|
||||
/// Only applies to PDF documents. Duplicates are automatically deduplicated.
|
||||
/// An `ocr` config is recommended for backend/language selection; defaults are used if absent.
|
||||
/// </summary>
|
||||
[JsonPropertyName("force_ocr_pages")]
|
||||
public List<uint>? ForceOcrPages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Disable OCR entirely, even for images.
|
||||
///
|
||||
/// When `true`, OCR is skipped for all document types. Images return metadata
|
||||
/// only (dimensions, format, EXIF) without text extraction. PDFs use only
|
||||
/// native text extraction without OCR fallback.
|
||||
///
|
||||
/// Cannot be `true` simultaneously with `force_ocr`.
|
||||
///
|
||||
/// *Added in v4.7.0.*
|
||||
/// </summary>
|
||||
[JsonPropertyName("disable_ocr")]
|
||||
public bool DisableOcr { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Text chunking configuration (null = chunking disabled)
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunking")]
|
||||
public ChunkingConfig? Chunking { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Content filtering configuration (null = use extractor defaults).
|
||||
///
|
||||
/// Controls whether document "furniture" (headers, footers, watermarks,
|
||||
/// repeating text) is included in or stripped from extraction results.
|
||||
/// See `ContentFilterConfig` for per-field documentation.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content_filter")]
|
||||
public ContentFilterConfig? ContentFilter { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Image extraction configuration (null = no image extraction)
|
||||
/// </summary>
|
||||
[JsonPropertyName("images")]
|
||||
public ImageExtractionConfig? Images { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// PDF-specific options (null = use defaults)
|
||||
/// </summary>
|
||||
[JsonPropertyName("pdf_options")]
|
||||
public PdfConfig? PdfOptions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Token reduction configuration (null = no token reduction)
|
||||
/// </summary>
|
||||
[JsonPropertyName("token_reduction")]
|
||||
public TokenReductionOptions? TokenReduction { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Language detection configuration (null = no language detection)
|
||||
/// </summary>
|
||||
[JsonPropertyName("language_detection")]
|
||||
public LanguageDetectionConfig? LanguageDetection { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Page extraction configuration (null = no page tracking)
|
||||
/// </summary>
|
||||
[JsonPropertyName("pages")]
|
||||
public PageConfig? Pages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Keyword extraction configuration (null = no keyword extraction)
|
||||
/// </summary>
|
||||
[JsonPropertyName("keywords")]
|
||||
public KeywordConfig? Keywords { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Post-processor configuration (null = use defaults)
|
||||
/// </summary>
|
||||
[JsonPropertyName("postprocessor")]
|
||||
public PostProcessorConfig? Postprocessor { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// HTML to Markdown conversion options (null = use defaults)
|
||||
///
|
||||
/// Configure how HTML documents are converted to Markdown, including heading styles,
|
||||
/// list formatting, code block styles, and preprocessing options.
|
||||
/// </summary>
|
||||
[JsonPropertyName("html_options")]
|
||||
public string? HtmlOptions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Styled HTML output configuration.
|
||||
///
|
||||
/// When set alongside `output_format = OutputFormat.Html`, the extraction
|
||||
/// pipeline uses `StyledHtmlRenderer`(crate.rendering.StyledHtmlRenderer)
|
||||
/// which emits stable `kb-*` CSS class hooks on every structural element
|
||||
/// and optionally embeds theme CSS or user-supplied CSS in a `<style>` block.
|
||||
///
|
||||
/// When `None`, the existing plain comrak-based HTML renderer is used.
|
||||
/// </summary>
|
||||
[JsonPropertyName("html_output")]
|
||||
public HtmlOutputConfig? HtmlOutput { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Default per-file timeout in seconds for batch extraction.
|
||||
///
|
||||
/// When set, each file in a batch will be canceled after this duration
|
||||
/// unless overridden by `FileExtractionConfig.timeout_secs`.
|
||||
///
|
||||
/// Defaults to `Some(60)` to prevent pathological files (e.g. deeply
|
||||
/// nested archives, documents with millions of cells) from running
|
||||
/// indefinitely and exhausting caller resources. Set to `None` to
|
||||
/// disable the timeout for trusted input or long-running workloads.
|
||||
/// </summary>
|
||||
[JsonPropertyName("extraction_timeout_secs")]
|
||||
public ulong? ExtractionTimeoutSecs { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum concurrent extractions in batch operations (null = (num_cpus × 1.5).ceil()).
|
||||
///
|
||||
/// Limits parallelism to prevent resource exhaustion when processing
|
||||
/// large batches. Defaults to (num_cpus × 1.5).ceil() when not set.
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_concurrent_extractions")]
|
||||
public ulong? MaxConcurrentExtractions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Result structure format
|
||||
///
|
||||
/// Controls whether results are returned in unified format (default) with all
|
||||
/// content in the `content` field, or element-based format with semantic
|
||||
/// elements (for Unstructured-compatible output).
|
||||
/// </summary>
|
||||
[JsonPropertyName("result_format")]
|
||||
public ResultFormat? ResultFormat { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Security limits for archive extraction.
|
||||
///
|
||||
/// Controls maximum archive size, compression ratio, file count, and other
|
||||
/// security thresholds to prevent decompression bomb attacks. Also caps
|
||||
/// nesting depth, iteration count, entity / token length, total
|
||||
/// content size, and table cell count for every extraction path that
|
||||
/// ingests user-controlled bytes.
|
||||
/// When `None`, default limits are used.
|
||||
/// </summary>
|
||||
[JsonPropertyName("security_limits")]
|
||||
public SecurityLimits? SecurityLimits { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum uncompressed size in bytes for a single embedded file before
|
||||
/// recursive extraction is attempted (default: 50 MiB).
|
||||
///
|
||||
/// Applies to embedded objects inside OOXML containers (DOCX, PPTX) and
|
||||
/// to email attachments processed via recursive extraction. Files that
|
||||
/// exceed this limit are skipped with a `ProcessingWarning` rather than
|
||||
/// passed to the extraction pipeline, preventing a single oversized
|
||||
/// embedded object from consuming unbounded memory or time.
|
||||
///
|
||||
/// Set to `None` to disable the per-embedded-file cap (falls back to
|
||||
/// `security_limits.max_archive_size` as the only guard).
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_embedded_file_bytes")]
|
||||
public ulong? MaxEmbeddedFileBytes { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Content text format (default: Plain).
|
||||
///
|
||||
/// Controls the format of the extracted content:
|
||||
/// - `Plain`: Raw extracted text (default)
|
||||
/// - `Markdown`: Markdown formatted output
|
||||
/// - `Djot`: Djot markup format (requires djot feature)
|
||||
/// - `Html`: HTML formatted output
|
||||
///
|
||||
/// When set to a structured format, extraction results will include
|
||||
/// formatted output. The `formatted_content` field may be populated
|
||||
/// when format conversion is applied.
|
||||
/// </summary>
|
||||
[JsonPropertyName("output_format")]
|
||||
public OutputFormat OutputFormat { get; init; } = OutputFormat.Plain;
|
||||
|
||||
/// <summary>
|
||||
/// Layout detection configuration (null = layout detection disabled).
|
||||
///
|
||||
/// When set, PDF pages and images are analyzed for document structure
|
||||
/// (headings, code, formulas, tables, figures, etc.) using RT-DETR models
|
||||
/// via ONNX Runtime. For PDFs, layout hints override paragraph classification
|
||||
/// in the markdown pipeline. For images, per-region OCR is performed with
|
||||
/// markdown formatting based on detected layout classes.
|
||||
/// Requires the `layout-detection` feature to run inference; the field is
|
||||
/// present whenever the `layout-types` feature is active (which includes
|
||||
/// `layout-detection` as well as the no-ORT target groups).
|
||||
/// </summary>
|
||||
[JsonPropertyName("layout")]
|
||||
public LayoutDetectionConfig? Layout { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Run layout detection on the non-OCR PDF markdown path.
|
||||
///
|
||||
/// When `true` and `layout` is `Some(_)`, layout regions inform heading,
|
||||
/// table, list, and figure detection in the structure pipeline that would
|
||||
/// otherwise rely on font-clustering heuristics alone. Significantly
|
||||
/// improves SF1 (structural F1) at the cost of inference latency
|
||||
/// (~150-300ms/page CPU, ~20-50ms/page GPU). Default: `false`.
|
||||
/// Requires the `layout-detection` feature.
|
||||
/// </summary>
|
||||
[JsonPropertyName("use_layout_for_markdown")]
|
||||
public bool UseLayoutForMarkdown { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Enable structured document tree output.
|
||||
///
|
||||
/// When true, populates the `document` field on `ExtractionResult` with a
|
||||
/// hierarchical `DocumentStructure` containing heading-driven section nesting,
|
||||
/// table grids, content layer classification, and inline annotations.
|
||||
///
|
||||
/// Independent of `result_format` — can be combined with Unified or ElementBased.
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_document_structure")]
|
||||
public bool IncludeDocumentStructure { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Hardware acceleration configuration for ONNX Runtime models.
|
||||
///
|
||||
/// Controls execution provider selection for layout detection and embedding
|
||||
/// models. When `None`, uses platform defaults (CoreML on macOS, CUDA on
|
||||
/// Linux, CPU on Windows).
|
||||
/// </summary>
|
||||
[JsonPropertyName("acceleration")]
|
||||
public AccelerationConfig? Acceleration { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Cache namespace for tenant isolation.
|
||||
///
|
||||
/// When set, cache entries are stored under `{cache_dir}/{namespace}/`.
|
||||
/// Must be alphanumeric, hyphens, or underscores only (max 64 chars).
|
||||
/// Different namespaces have isolated cache spaces on the same filesystem.
|
||||
/// </summary>
|
||||
[JsonPropertyName("cache_namespace")]
|
||||
public string? CacheNamespace { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Per-request cache TTL in seconds.
|
||||
///
|
||||
/// Overrides the global `max_age_days` for this specific extraction.
|
||||
/// When `0`, caching is completely skipped (no read or write).
|
||||
/// When `None`, the global TTL applies.
|
||||
/// </summary>
|
||||
[JsonPropertyName("cache_ttl_secs")]
|
||||
public ulong? CacheTtlSecs { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Email extraction configuration (null = use defaults).
|
||||
///
|
||||
/// Currently supports configuring the fallback codepage for MSG files
|
||||
/// that do not specify one. See `EmailConfig` for details.
|
||||
/// </summary>
|
||||
[JsonPropertyName("email")]
|
||||
public EmailConfig? Email { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Concurrency limits for constrained environments (null = use defaults).
|
||||
///
|
||||
/// Controls Rayon thread pool size, ONNX Runtime intra-op threads, and
|
||||
/// (when `max_concurrent_extractions` is unset) the batch concurrency
|
||||
/// semaphore. See `ConcurrencyConfig` for details.
|
||||
/// </summary>
|
||||
[JsonPropertyName("concurrency")]
|
||||
public string? Concurrency { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum recursion depth for archive extraction (default: 3).
|
||||
/// Set to 0 to disable recursive extraction (legacy behavior).
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_archive_depth")]
|
||||
public ulong MaxArchiveDepth { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Tree-sitter language pack configuration (null = tree-sitter disabled).
|
||||
///
|
||||
/// When set, enables code file extraction using tree-sitter parsers.
|
||||
/// Controls grammar download behavior and code analysis options.
|
||||
/// </summary>
|
||||
[JsonPropertyName("tree_sitter")]
|
||||
public TreeSitterConfig? TreeSitter { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Structured extraction via LLM (null = disabled).
|
||||
///
|
||||
/// When set, the extracted document content is sent to an LLM with the
|
||||
/// provided JSON schema. The structured response is stored in
|
||||
/// `ExtractionResult.structured_output`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("structured_extraction")]
|
||||
public StructuredExtractionConfig? StructuredExtraction { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Cancellation token for this extraction (null = no external cancellation).
|
||||
///
|
||||
/// Pass a `CancellationToken` clone here and call `CancellationToken.cancel`
|
||||
/// from another thread / task to abort the extraction in progress. The extractor
|
||||
/// checks the token at safe checkpoints (before lock acquisition, between pages,
|
||||
/// between batch items) and returns `KreuzbergError.Cancelled` when set.
|
||||
///
|
||||
/// The field is excluded from serialization because `CancellationToken` is a
|
||||
/// runtime handle, not a configuration value.
|
||||
/// </summary>
|
||||
[JsonPropertyName("cancel_token")]
|
||||
public string? CancelToken { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExtractionConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExtractionConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExtractionConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExtractionConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExtractionConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static ExtractionConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.ExtractionConfigDefault();
|
||||
var jsonPtr = NativeMethods.ExtractionConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.ExtractionConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<ExtractionConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
102
packages/csharp/src/Kreuzberg/ExtractionDiff.cs
generated
Normal file
102
packages/csharp/src/Kreuzberg/ExtractionDiff.cs
generated
Normal file
@@ -0,0 +1,102 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// The complete diff between two `ExtractionResult` values.
|
||||
/// </summary>
|
||||
public sealed record ExtractionDiff
|
||||
{
|
||||
/// <summary>
|
||||
/// Unified-diff hunks for the `content` field.
|
||||
///
|
||||
/// Empty when the content is identical.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content_diff")]
|
||||
public List<DiffHunk> ContentDiff { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Tables present in `b` but not in `a` (by index position, excess right-side tables).
|
||||
/// </summary>
|
||||
[JsonPropertyName("tables_added")]
|
||||
public List<Table> TablesAdded { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Tables present in `a` but not in `b` (by index position, excess left-side tables).
|
||||
/// </summary>
|
||||
[JsonPropertyName("tables_removed")]
|
||||
public List<Table> TablesRemoved { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Cell-level changes for table pairs that share the same index and dimensions.
|
||||
/// </summary>
|
||||
[JsonPropertyName("tables_changed")]
|
||||
public List<TableDiff> TablesChanged { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Metadata difference, encoded as a JSON object with three top-level keys:
|
||||
/// `added` (keys present in `b` but not `a`), `removed` (keys present in `a`
|
||||
/// but not `b`), and `changed` (keys whose values differ — each entry is
|
||||
/// `{ "from": <value-in-a>, "to": <value-in-b> }`).
|
||||
///
|
||||
/// This is NOT RFC 6902 JSON Patch — we deliberately chose a flatter shape
|
||||
/// to avoid pulling in a json-patch crate. If you need RFC 6902 semantics
|
||||
/// (with JSON Pointer paths) feed `a.metadata` and `b.metadata` to your
|
||||
/// preferred json-patch impl directly.
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata_changed")]
|
||||
public required string MetadataChanged { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Changes to embedded archive children.
|
||||
/// </summary>
|
||||
[JsonPropertyName("embedded_changes")]
|
||||
public required EmbeddedChanges EmbeddedChanges { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExtractionDiff"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExtractionDiff FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExtractionDiff>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExtractionDiff from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExtractionDiff from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
53
packages/csharp/src/Kreuzberg/ExtractionMethod.cs
generated
Normal file
53
packages/csharp/src/Kreuzberg/ExtractionMethod.cs
generated
Normal file
@@ -0,0 +1,53 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// How the extracted text was produced.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ExtractionMethodJsonConverter))]
|
||||
public enum ExtractionMethod
|
||||
{
|
||||
[JsonPropertyName("native")]
|
||||
Native,
|
||||
[JsonPropertyName("ocr")]
|
||||
Ocr,
|
||||
[JsonPropertyName("mixed")]
|
||||
Mixed,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ExtractionMethod"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ExtractionMethodJsonConverter : JsonConverter<ExtractionMethod>
|
||||
{
|
||||
public override ExtractionMethod Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"native" => ExtractionMethod.Native,
|
||||
"ocr" => ExtractionMethod.Ocr,
|
||||
"mixed" => ExtractionMethod.Mixed,
|
||||
_ => throw new JsonException($"Unknown ExtractionMethod value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ExtractionMethod value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ExtractionMethod.Native => "native",
|
||||
ExtractionMethod.Ocr => "ocr",
|
||||
ExtractionMethod.Mixed => "mixed",
|
||||
_ => throw new JsonException($"Unknown ExtractionMethod value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
332
packages/csharp/src/Kreuzberg/ExtractionResult.cs
generated
Normal file
332
packages/csharp/src/Kreuzberg/ExtractionResult.cs
generated
Normal file
@@ -0,0 +1,332 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// General extraction result used by the core extraction API.
|
||||
///
|
||||
/// This is the main result type returned by all extraction functions.
|
||||
/// </summary>
|
||||
public sealed record ExtractionResult
|
||||
{
|
||||
[JsonPropertyName("content")]
|
||||
public string Content { get; init; } = "";
|
||||
|
||||
[JsonPropertyName("mime_type")]
|
||||
public string MimeType { get; init; } = "";
|
||||
|
||||
[JsonPropertyName("metadata")]
|
||||
public Metadata Metadata { get; init; } = default!;
|
||||
|
||||
/// <summary>
|
||||
/// Extraction strategy used to produce the returned text.
|
||||
///
|
||||
/// Populated when the extractor can reliably distinguish native text extraction,
|
||||
/// OCR-only extraction, or mixed native/OCR output.
|
||||
/// </summary>
|
||||
[JsonPropertyName("extraction_method")]
|
||||
public ExtractionMethod? ExtractionMethod { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("tables")]
|
||||
public List<Table> Tables { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("detected_languages")]
|
||||
public List<string>? DetectedLanguages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Text chunks when chunking is enabled.
|
||||
///
|
||||
/// When chunking configuration is provided, the content is split into
|
||||
/// overlapping chunks for efficient processing. Each chunk contains the text,
|
||||
/// optional embeddings (if enabled), and metadata about its position.
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunks")]
|
||||
public List<Chunk>? Chunks { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Extracted images from the document.
|
||||
///
|
||||
/// When image extraction is enabled via `ImageExtractionConfig`, this field
|
||||
/// contains all images found in the document with their raw data and metadata.
|
||||
/// Each image may optionally contain a nested `ocr_result` if OCR was performed.
|
||||
/// </summary>
|
||||
[JsonPropertyName("images")]
|
||||
public List<ExtractedImage>? Images { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Per-page content when page extraction is enabled.
|
||||
///
|
||||
/// When page extraction is configured, the document is split into per-page content
|
||||
/// with tables and images mapped to their respective pages.
|
||||
/// </summary>
|
||||
[JsonPropertyName("pages")]
|
||||
public List<PageContent>? Pages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Semantic elements when element-based result format is enabled.
|
||||
///
|
||||
/// When result_format is set to ElementBased, this field contains semantic
|
||||
/// elements with type classification, unique identifiers, and metadata for
|
||||
/// Unstructured-compatible element-based processing.
|
||||
/// </summary>
|
||||
[JsonPropertyName("elements")]
|
||||
public List<Element>? Elements { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Rich Djot content structure (when extracting Djot documents).
|
||||
///
|
||||
/// When extracting Djot documents with structured extraction enabled,
|
||||
/// this field contains the full semantic structure including:
|
||||
/// - Block-level elements with nesting
|
||||
/// - Inline formatting with attributes
|
||||
/// - Links, images, footnotes
|
||||
/// - Math expressions
|
||||
/// - Complete attribute information
|
||||
///
|
||||
/// The `content` field still contains plain text for backward compatibility.
|
||||
///
|
||||
/// Always `None` for non-Djot documents.
|
||||
/// </summary>
|
||||
[JsonPropertyName("djot_content")]
|
||||
public DjotContent? DjotContent { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// OCR elements with full spatial and confidence metadata.
|
||||
///
|
||||
/// When OCR is performed with element extraction enabled, this field contains
|
||||
/// the structured representation of detected text including:
|
||||
/// - Bounding geometry (rectangles or quadrilaterals)
|
||||
/// - Confidence scores (detection and recognition)
|
||||
/// - Rotation information
|
||||
/// - Hierarchical relationships (Tesseract only)
|
||||
///
|
||||
/// This field preserves all metadata that would otherwise be lost when
|
||||
/// converting to plain text or markdown output formats.
|
||||
///
|
||||
/// Only populated when `OcrElementConfig.include_elements` is true.
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr_elements")]
|
||||
public List<OcrElement>? OcrElements { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Structured document tree (when document structure extraction is enabled).
|
||||
///
|
||||
/// When `include_document_structure` is true in `ExtractionConfig`, this field
|
||||
/// contains the full hierarchical representation of the document including:
|
||||
/// - Heading-driven section nesting
|
||||
/// - Table grids with cell-level metadata
|
||||
/// - Content layer classification (body, header, footer, footnote)
|
||||
/// - Inline text annotations (formatting, links)
|
||||
/// - Bounding boxes and page numbers
|
||||
///
|
||||
/// Independent of `result_format` — can be combined with Unified or ElementBased.
|
||||
/// </summary>
|
||||
[JsonPropertyName("document")]
|
||||
public DocumentStructure? Document { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Extracted keywords when keyword extraction is enabled.
|
||||
///
|
||||
/// When keyword extraction (RAKE or YAKE) is configured, this field contains
|
||||
/// the extracted keywords with scores, algorithm info, and position data.
|
||||
/// Previously stored in `metadata.additional["keywords"]`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("extracted_keywords")]
|
||||
public List<Keyword>? ExtractedKeywords { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document quality score from quality analysis.
|
||||
///
|
||||
/// A value between 0.0 and 1.0 indicating the overall text quality.
|
||||
/// Previously stored in `metadata.additional["quality_score"]`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("quality_score")]
|
||||
public double? QualityScore { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Non-fatal warnings collected during processing pipeline stages.
|
||||
///
|
||||
/// Captures errors from optional pipeline features (embedding, chunking,
|
||||
/// language detection, output formatting) that don't prevent extraction
|
||||
/// but may indicate degraded results.
|
||||
/// Previously stored as individual keys in `metadata.additional`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("processing_warnings")]
|
||||
public List<ProcessingWarning> ProcessingWarnings { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// PDF annotations extracted from the document.
|
||||
///
|
||||
/// When annotation extraction is enabled via `PdfConfig.extract_annotations`,
|
||||
/// this field contains text notes, highlights, links, stamps, and other
|
||||
/// annotations found in PDF documents.
|
||||
/// </summary>
|
||||
[JsonPropertyName("annotations")]
|
||||
public List<PdfAnnotation>? Annotations { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Nested extraction results from archive contents.
|
||||
///
|
||||
/// When extracting archives, each processable file inside produces its own
|
||||
/// full extraction result. Set to `None` for non-archive formats.
|
||||
/// Use `max_archive_depth` in config to control recursion depth.
|
||||
/// </summary>
|
||||
[JsonPropertyName("children")]
|
||||
public List<ArchiveEntry>? Children { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// URIs/links discovered during document extraction.
|
||||
///
|
||||
/// Contains hyperlinks, image references, citations, email addresses, and
|
||||
/// other URI-like references found in the document. Always extracted when
|
||||
/// present in the source document.
|
||||
/// </summary>
|
||||
[JsonPropertyName("uris")]
|
||||
public List<ExtractedUri>? Uris { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Tracked changes embedded in the source document.
|
||||
///
|
||||
/// Populated by per-format extractors that understand change-tracking
|
||||
/// metadata (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`,
|
||||
/// …). Every extractor defaults to `None` until its format-specific
|
||||
/// implementation is added. Extractors that do populate this field follow
|
||||
/// the "accepted-changes" convention: inserted text is present in
|
||||
/// `content`, deleted text is absent — the revision list is the separate
|
||||
/// audit trail.
|
||||
/// </summary>
|
||||
[JsonPropertyName("revisions")]
|
||||
public List<DocumentRevision>? Revisions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Structured extraction output from LLM-based JSON schema extraction.
|
||||
///
|
||||
/// When `structured_extraction` is configured in `ExtractionConfig`, the
|
||||
/// extracted document content is sent to a VLM with the provided JSON schema.
|
||||
/// The response is parsed and stored here as a JSON value matching the schema.
|
||||
/// </summary>
|
||||
[JsonPropertyName("structured_output")]
|
||||
public string? StructuredOutput { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Code intelligence results from tree-sitter analysis.
|
||||
///
|
||||
/// Populated when extracting source code files with the `tree-sitter` feature.
|
||||
/// Contains metrics, structural analysis, imports/exports, comments,
|
||||
/// docstrings, symbols, diagnostics, and optionally chunked code segments.
|
||||
///
|
||||
/// Stored as an opaque JSON value so that all language bindings (Go, Java,
|
||||
/// C#, …) can deserialize it as a raw JSON object rather than a typed struct.
|
||||
/// The underlying type is `tree_sitter_language_pack.ProcessResult`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("code_intelligence")]
|
||||
public string? CodeIntelligence { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// LLM token usage and cost data for all LLM calls made during this extraction.
|
||||
///
|
||||
/// Contains one entry per LLM call. Multiple entries are produced when
|
||||
/// VLM OCR, structured extraction, or LLM embeddings run during
|
||||
/// the same extraction.
|
||||
///
|
||||
/// `None` when no LLM was used.
|
||||
/// </summary>
|
||||
[JsonPropertyName("llm_usage")]
|
||||
public List<LlmUsage>? LlmUsage { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Pre-rendered content in the requested output format.
|
||||
///
|
||||
/// Populated during `derive_extraction_result` before tree derivation consumes
|
||||
/// element data. `apply_output_format` swaps this into `content` at the end
|
||||
/// of the pipeline, after post-processors have operated on plain text.
|
||||
/// </summary>
|
||||
[JsonPropertyName("formatted_content")]
|
||||
public string? FormattedContent { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Structured hOCR document for the OCR+layout pipeline.
|
||||
///
|
||||
/// When tesseract produces hOCR output, the parsed `InternalDocument` carries
|
||||
/// paragraph structure with bounding boxes and confidence scores. The layout
|
||||
/// classification step enriches these elements before final rendering.
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr_internal_document")]
|
||||
public string? OcrInternalDocument { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ExtractionResult"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ExtractionResult FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ExtractionResult>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ExtractionResult from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ExtractionResult from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Convert from an OCR result.
|
||||
/// </summary>
|
||||
public static ExtractionResult FromOcr(OcrExtractionResult ocr)
|
||||
{
|
||||
var ocrJson = JsonSerializer.Serialize(ocr, JsonSerializationOptions);
|
||||
var ocrHandle = NativeMethods.OcrExtractionResultFromJson(ocrJson);
|
||||
if (ocrHandle == IntPtr.Zero)
|
||||
{
|
||||
var ec = NativeMethods.LastErrorCode();
|
||||
var ctxPtr = NativeMethods.LastErrorContext();
|
||||
var msg = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(ctxPtr) ?? "OcrExtractionResultFromJson failed";
|
||||
throw new KreuzbergException(ec, msg);
|
||||
}
|
||||
try
|
||||
{
|
||||
var nativeResult = NativeMethods.ExtractionResultFromOcr(ocrHandle);
|
||||
var jsonPtr = NativeMethods.ExtractionResultToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.ExtractionResultFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<ExtractionResult>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (ocrHandle != global::System.IntPtr.Zero) NativeMethods.OcrExtractionResultFree(ocrHandle);
|
||||
}
|
||||
}
|
||||
}
|
||||
65
packages/csharp/src/Kreuzberg/FictionBookMetadata.cs
generated
Normal file
65
packages/csharp/src/Kreuzberg/FictionBookMetadata.cs
generated
Normal file
@@ -0,0 +1,65 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// FictionBook (FB2) metadata.
|
||||
/// </summary>
|
||||
public sealed record FictionBookMetadata
|
||||
{
|
||||
[JsonPropertyName("genres")]
|
||||
public List<string> Genres { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("sequences")]
|
||||
public List<string> Sequences { get; init; } = [];
|
||||
|
||||
[JsonPropertyName("annotation")]
|
||||
public string? Annotation { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="FictionBookMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static FictionBookMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<FictionBookMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse FictionBookMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse FictionBookMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
210
packages/csharp/src/Kreuzberg/FileExtractionConfig.cs
generated
Normal file
210
packages/csharp/src/Kreuzberg/FileExtractionConfig.cs
generated
Normal file
@@ -0,0 +1,210 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Per-file extraction configuration overrides for batch processing.
|
||||
///
|
||||
/// All fields are `Option<T>` — `None` means "use the batch-level default."
|
||||
/// This type is used with `batch_extract_files` and
|
||||
/// `batch_extract_bytes` to allow heterogeneous
|
||||
/// extraction settings within a single batch.
|
||||
///
|
||||
/// # Excluded Fields
|
||||
///
|
||||
/// The following `ExtractionConfig` fields are batch-level only and
|
||||
/// cannot be overridden per file:
|
||||
/// - `max_concurrent_extractions` — controls batch parallelism
|
||||
/// - `use_cache` — global caching policy
|
||||
/// - `acceleration` — shared ONNX execution provider
|
||||
/// - `security_limits` — global archive security policy
|
||||
/// </summary>
|
||||
public sealed record FileExtractionConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Override quality post-processing for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("enable_quality_processing")]
|
||||
public bool? EnableQualityProcessing { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override OCR configuration for this file (null in the Option = use batch default).
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr")]
|
||||
public OcrConfig? Ocr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override force OCR for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("force_ocr")]
|
||||
public bool? ForceOcr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override force OCR pages for this file (1-indexed page numbers).
|
||||
/// </summary>
|
||||
[JsonPropertyName("force_ocr_pages")]
|
||||
public List<uint>? ForceOcrPages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override disable OCR for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("disable_ocr")]
|
||||
public bool? DisableOcr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override chunking configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunking")]
|
||||
public ChunkingConfig? Chunking { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override content filtering configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content_filter")]
|
||||
public ContentFilterConfig? ContentFilter { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override image extraction configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("images")]
|
||||
public ImageExtractionConfig? Images { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override PDF options for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("pdf_options")]
|
||||
public PdfConfig? PdfOptions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override token reduction for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("token_reduction")]
|
||||
public TokenReductionOptions? TokenReduction { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override language detection for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("language_detection")]
|
||||
public LanguageDetectionConfig? LanguageDetection { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override page extraction for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("pages")]
|
||||
public PageConfig? Pages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override keyword extraction for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("keywords")]
|
||||
public KeywordConfig? Keywords { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override post-processor for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("postprocessor")]
|
||||
public PostProcessorConfig? Postprocessor { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override HTML conversion options for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("html_options")]
|
||||
public string? HtmlOptions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override result format for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("result_format")]
|
||||
public ResultFormat? ResultFormat { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override output content format for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("output_format")]
|
||||
public OutputFormat? OutputFormat { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override document structure output for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_document_structure")]
|
||||
public bool? IncludeDocumentStructure { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override layout detection for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("layout")]
|
||||
public LayoutDetectionConfig? Layout { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override per-file extraction timeout in seconds.
|
||||
///
|
||||
/// When set, the extraction for this file will be canceled after the
|
||||
/// specified duration. A timed-out file produces an error result without
|
||||
/// affecting other files in the batch.
|
||||
/// </summary>
|
||||
[JsonPropertyName("timeout_secs")]
|
||||
public ulong? TimeoutSecs { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override tree-sitter configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("tree_sitter")]
|
||||
public TreeSitterConfig? TreeSitter { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override structured extraction configuration for this file.
|
||||
///
|
||||
/// When set, enables LLM-based structured extraction with a JSON schema
|
||||
/// for this specific file. The extracted content is sent to a VLM/LLM
|
||||
/// and the response is parsed according to the provided schema.
|
||||
/// </summary>
|
||||
[JsonPropertyName("structured_extraction")]
|
||||
public StructuredExtractionConfig? StructuredExtraction { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="FileExtractionConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static FileExtractionConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<FileExtractionConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/Footnote.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/Footnote.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Footnote in Djot.
|
||||
/// </summary>
|
||||
public sealed record Footnote
|
||||
{
|
||||
/// <summary>
|
||||
/// Footnote label
|
||||
/// </summary>
|
||||
[JsonPropertyName("label")]
|
||||
public required string Label { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Footnote content blocks
|
||||
/// </summary>
|
||||
[JsonPropertyName("content")]
|
||||
public List<FormattedBlock> Content { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="Footnote"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static Footnote FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<Footnote>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse Footnote from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse Footnote from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
294
packages/csharp/src/Kreuzberg/FormatMetadata.cs
generated
Normal file
294
packages/csharp/src/Kreuzberg/FormatMetadata.cs
generated
Normal file
@@ -0,0 +1,294 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Format-specific metadata (discriminated union).
|
||||
///
|
||||
/// Only one format type can exist per extraction result. This provides
|
||||
/// type-safe, clean metadata without nested optionals.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(FormatMetadataJsonConverter))]
|
||||
public abstract record FormatMetadata
|
||||
{
|
||||
public sealed record Pdf(
|
||||
PdfMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Docx(
|
||||
DocxMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Excel(
|
||||
ExcelMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Email(
|
||||
EmailMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Pptx(
|
||||
PptxMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Archive(
|
||||
ArchiveMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Image(
|
||||
ImageMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Xml(
|
||||
XmlMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Text(
|
||||
TextMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Html(
|
||||
HtmlMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Ocr(
|
||||
OcrMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Csv(
|
||||
CsvMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Bibtex(
|
||||
BibtexMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Citation(
|
||||
CitationMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record FictionBook(
|
||||
FictionBookMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Dbf(
|
||||
DbfMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Jats(
|
||||
JatsMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Epub(
|
||||
EpubMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Pst(
|
||||
PstMetadata Value
|
||||
) : FormatMetadata;
|
||||
|
||||
public sealed record Code(
|
||||
object Value
|
||||
) : FormatMetadata;
|
||||
|
||||
/// <summary>Returns the Pdf data if this is a Pdf variant, otherwise null.</summary>
|
||||
public PdfMetadata? AsPdf => this is Pdf e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Docx data if this is a Docx variant, otherwise null.</summary>
|
||||
public DocxMetadata? AsDocx => this is Docx e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Excel data if this is a Excel variant, otherwise null.</summary>
|
||||
public ExcelMetadata? AsExcel => this is Excel e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Email data if this is a Email variant, otherwise null.</summary>
|
||||
public EmailMetadata? AsEmail => this is Email e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Pptx data if this is a Pptx variant, otherwise null.</summary>
|
||||
public PptxMetadata? AsPptx => this is Pptx e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Archive data if this is a Archive variant, otherwise null.</summary>
|
||||
public ArchiveMetadata? AsArchive => this is Archive e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Image data if this is a Image variant, otherwise null.</summary>
|
||||
public ImageMetadata? AsImage => this is Image e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Xml data if this is a Xml variant, otherwise null.</summary>
|
||||
public XmlMetadata? AsXml => this is Xml e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Text data if this is a Text variant, otherwise null.</summary>
|
||||
public TextMetadata? AsText => this is Text e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Html data if this is a Html variant, otherwise null.</summary>
|
||||
public HtmlMetadata? AsHtml => this is Html e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Ocr data if this is a Ocr variant, otherwise null.</summary>
|
||||
public OcrMetadata? AsOcr => this is Ocr e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Csv data if this is a Csv variant, otherwise null.</summary>
|
||||
public CsvMetadata? AsCsv => this is Csv e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Bibtex data if this is a Bibtex variant, otherwise null.</summary>
|
||||
public BibtexMetadata? AsBibtex => this is Bibtex e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Citation data if this is a Citation variant, otherwise null.</summary>
|
||||
public CitationMetadata? AsCitation => this is Citation e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the FictionBook data if this is a FictionBook variant, otherwise null.</summary>
|
||||
public FictionBookMetadata? AsFictionBook => this is FictionBook e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Dbf data if this is a Dbf variant, otherwise null.</summary>
|
||||
public DbfMetadata? AsDbf => this is Dbf e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Jats data if this is a Jats variant, otherwise null.</summary>
|
||||
public JatsMetadata? AsJats => this is Jats e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Epub data if this is a Epub variant, otherwise null.</summary>
|
||||
public EpubMetadata? AsEpub => this is Epub e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Pst data if this is a Pst variant, otherwise null.</summary>
|
||||
public PstMetadata? AsPst => this is Pst e ? e.Value : null;
|
||||
|
||||
/// <summary>Returns the Code data if this is a Code variant, otherwise null.</summary>
|
||||
public object? AsCode => this is Code e ? e.Value : null;
|
||||
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Custom converter for FormatMetadata sealed union with flattened variant fields.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Handles JSON objects with a discriminator field (format_type) and variant-specific
|
||||
/// fields at the same level. System.Text.Json's [JsonPolymorphic] cannot handle
|
||||
/// this layout, so we manually deserialize here.
|
||||
/// </remarks>
|
||||
public sealed class FormatMetadataJsonConverter : JsonConverter<FormatMetadata>
|
||||
{
|
||||
public override FormatMetadata Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
if (reader.TokenType != JsonTokenType.StartObject)
|
||||
{
|
||||
throw new JsonException($"Expected JSON object, got {reader.TokenType}");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.ParseValue(ref reader);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("format_type", out var tagElement))
|
||||
{
|
||||
throw new JsonException($"Missing discriminator field: format_type");
|
||||
}
|
||||
|
||||
var tagValue = tagElement.GetString();
|
||||
if (tagValue == null)
|
||||
{
|
||||
throw new JsonException("Discriminator field is null");
|
||||
}
|
||||
|
||||
// Tuple-variant records (`Variant(InnerStruct value)`) expect a single
|
||||
// "Value" field holding the inner struct's JSON, so wrap the remaining
|
||||
// fields under "Value". Struct-variant records (`Variant { field1,
|
||||
// field2 }`) have positional record components annotated with
|
||||
// [JsonPropertyName(...)] for each named field, so pass the remaining
|
||||
// fields through directly without the wrap.
|
||||
using var ms = new MemoryStream();
|
||||
using var writer = new Utf8JsonWriter(ms);
|
||||
writer.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "format_type")
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
var flatJson = ms.ToArray();
|
||||
|
||||
using var msWrapped = new MemoryStream();
|
||||
using var writerWrapped = new Utf8JsonWriter(msWrapped);
|
||||
writerWrapped.WriteStartObject();
|
||||
writerWrapped.WritePropertyName("Value");
|
||||
writerWrapped.WriteStartObject();
|
||||
foreach (var prop in root.EnumerateObject())
|
||||
{
|
||||
if (prop.Name != "format_type")
|
||||
{
|
||||
writerWrapped.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writerWrapped);
|
||||
}
|
||||
}
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.WriteEndObject();
|
||||
writerWrapped.Flush();
|
||||
msWrapped.Position = 0;
|
||||
var wrappedJson = msWrapped.ToArray();
|
||||
|
||||
return tagValue switch
|
||||
{ "pdf" => JsonSerializer.Deserialize<FormatMetadata.Pdf>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "docx" => JsonSerializer.Deserialize<FormatMetadata.Docx>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "excel" => JsonSerializer.Deserialize<FormatMetadata.Excel>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "email" => JsonSerializer.Deserialize<FormatMetadata.Email>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "pptx" => JsonSerializer.Deserialize<FormatMetadata.Pptx>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "archive" => JsonSerializer.Deserialize<FormatMetadata.Archive>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "image" => JsonSerializer.Deserialize<FormatMetadata.Image>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "xml" => JsonSerializer.Deserialize<FormatMetadata.Xml>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "text" => JsonSerializer.Deserialize<FormatMetadata.Text>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "html" => JsonSerializer.Deserialize<FormatMetadata.Html>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "ocr" => JsonSerializer.Deserialize<FormatMetadata.Ocr>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "csv" => JsonSerializer.Deserialize<FormatMetadata.Csv>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "bibtex" => JsonSerializer.Deserialize<FormatMetadata.Bibtex>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "citation" => JsonSerializer.Deserialize<FormatMetadata.Citation>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "fiction_book" => JsonSerializer.Deserialize<FormatMetadata.FictionBook>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "dbf" => JsonSerializer.Deserialize<FormatMetadata.Dbf>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "jats" => JsonSerializer.Deserialize<FormatMetadata.Jats>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "epub" => JsonSerializer.Deserialize<FormatMetadata.Epub>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "pst" => JsonSerializer.Deserialize<FormatMetadata.Pst>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), "code" => JsonSerializer.Deserialize<FormatMetadata.Code>(wrappedJson, options) ?? throw new JsonException("Failed to deserialize variant"), _ => throw new JsonException($"Unknown FormatMetadata discriminator: {tagValue}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, FormatMetadata value, JsonSerializerOptions options)
|
||||
{
|
||||
// Emit the discriminator tag plus the inner variant's fields flattened at
|
||||
// the same level — mirrors the Java sealed-union serializer pattern. Turn
|
||||
// `Message.User(UserMessage value)` into `{"format_type":"user","content":...}`
|
||||
// not `{"value":{...}}`. Without this, sending a chat request to FFI fails
|
||||
// with "missing field format_type" inside Rust serde.
|
||||
string tag;
|
||||
object? inner;
|
||||
switch (value)
|
||||
{ case FormatMetadata.Pdf v_pdf:
|
||||
tag = "pdf"; inner = v_pdf.Value; break; case FormatMetadata.Docx v_docx:
|
||||
tag = "docx"; inner = v_docx.Value; break; case FormatMetadata.Excel v_excel:
|
||||
tag = "excel"; inner = v_excel.Value; break; case FormatMetadata.Email v_email:
|
||||
tag = "email"; inner = v_email.Value; break; case FormatMetadata.Pptx v_pptx:
|
||||
tag = "pptx"; inner = v_pptx.Value; break; case FormatMetadata.Archive v_archive:
|
||||
tag = "archive"; inner = v_archive.Value; break; case FormatMetadata.Image v_image:
|
||||
tag = "image"; inner = v_image.Value; break; case FormatMetadata.Xml v_xml:
|
||||
tag = "xml"; inner = v_xml.Value; break; case FormatMetadata.Text v_text:
|
||||
tag = "text"; inner = v_text.Value; break; case FormatMetadata.Html v_html:
|
||||
tag = "html"; inner = v_html.Value; break; case FormatMetadata.Ocr v_ocr:
|
||||
tag = "ocr"; inner = v_ocr.Value; break; case FormatMetadata.Csv v_csv:
|
||||
tag = "csv"; inner = v_csv.Value; break; case FormatMetadata.Bibtex v_bibtex:
|
||||
tag = "bibtex"; inner = v_bibtex.Value; break; case FormatMetadata.Citation v_citation:
|
||||
tag = "citation"; inner = v_citation.Value; break; case FormatMetadata.FictionBook v_fictionbook:
|
||||
tag = "fiction_book"; inner = v_fictionbook.Value; break; case FormatMetadata.Dbf v_dbf:
|
||||
tag = "dbf"; inner = v_dbf.Value; break; case FormatMetadata.Jats v_jats:
|
||||
tag = "jats"; inner = v_jats.Value; break; case FormatMetadata.Epub v_epub:
|
||||
tag = "epub"; inner = v_epub.Value; break; case FormatMetadata.Pst v_pst:
|
||||
tag = "pst"; inner = v_pst.Value; break; case FormatMetadata.Code v_code:
|
||||
tag = "code"; inner = v_code.Value; break; default:
|
||||
throw new JsonException($"Unknown FormatMetadata variant: {value.GetType().Name}");
|
||||
}
|
||||
|
||||
writer.WriteStartObject();
|
||||
writer.WriteString("format_type", tag);
|
||||
if (inner != null)
|
||||
{
|
||||
using var doc = JsonSerializer.SerializeToDocument(inner, inner.GetType(), options);
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in doc.RootElement.EnumerateObject())
|
||||
{
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.WriteEndObject();
|
||||
}
|
||||
}
|
||||
100
packages/csharp/src/Kreuzberg/FormattedBlock.cs
generated
Normal file
100
packages/csharp/src/Kreuzberg/FormattedBlock.cs
generated
Normal file
@@ -0,0 +1,100 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Block-level element in a Djot document.
|
||||
///
|
||||
/// Represents structural elements like headings, paragraphs, lists, code blocks, etc.
|
||||
/// </summary>
|
||||
public sealed record FormattedBlock
|
||||
{
|
||||
/// <summary>
|
||||
/// Type of block element
|
||||
/// </summary>
|
||||
[JsonPropertyName("block_type")]
|
||||
public required BlockType BlockType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Heading level (1-6) for headings, or nesting level for lists
|
||||
/// </summary>
|
||||
[JsonPropertyName("level")]
|
||||
public ulong? Level { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Inline content within the block
|
||||
/// </summary>
|
||||
[JsonPropertyName("inline_content")]
|
||||
public List<InlineElement> InlineContent { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Element attributes (classes, IDs, key-value pairs)
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public string? Attributes { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Language identifier for code blocks
|
||||
/// </summary>
|
||||
[JsonPropertyName("language")]
|
||||
public string? Language { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Raw code content for code blocks
|
||||
/// </summary>
|
||||
[JsonPropertyName("code")]
|
||||
public string? Code { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Nested blocks for containers (blockquotes, list items, divs)
|
||||
/// </summary>
|
||||
[JsonPropertyName("children")]
|
||||
public List<FormattedBlock> Children { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="FormattedBlock"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static FormattedBlock FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<FormattedBlock>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse FormattedBlock from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse FormattedBlock from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
98
packages/csharp/src/Kreuzberg/GridCell.cs
generated
Normal file
98
packages/csharp/src/Kreuzberg/GridCell.cs
generated
Normal file
@@ -0,0 +1,98 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Individual grid cell with position and span metadata.
|
||||
/// </summary>
|
||||
public sealed record GridCell
|
||||
{
|
||||
/// <summary>
|
||||
/// Cell text content.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content")]
|
||||
public required string Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Zero-indexed row position.
|
||||
/// </summary>
|
||||
[JsonPropertyName("row")]
|
||||
public uint Row { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Zero-indexed column position.
|
||||
/// </summary>
|
||||
[JsonPropertyName("col")]
|
||||
public uint Col { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Number of rows this cell spans.
|
||||
/// </summary>
|
||||
[JsonPropertyName("row_span")]
|
||||
public uint RowSpan { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Number of columns this cell spans.
|
||||
/// </summary>
|
||||
[JsonPropertyName("col_span")]
|
||||
public uint ColSpan { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Whether this is a header cell.
|
||||
/// </summary>
|
||||
[JsonPropertyName("is_header")]
|
||||
public bool IsHeader { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box for this cell (if available).
|
||||
/// </summary>
|
||||
[JsonPropertyName("bbox")]
|
||||
public BoundingBox? Bbox { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="GridCell"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static GridCell FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<GridCell>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse GridCell from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse GridCell from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
86
packages/csharp/src/Kreuzberg/HeaderMetadata.cs
generated
Normal file
86
packages/csharp/src/Kreuzberg/HeaderMetadata.cs
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Header/heading element metadata.
|
||||
/// </summary>
|
||||
public sealed record HeaderMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Header level: 1 (h1) through 6 (h6)
|
||||
/// </summary>
|
||||
[JsonPropertyName("level")]
|
||||
public byte Level { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Normalized text content of the header
|
||||
/// </summary>
|
||||
[JsonPropertyName("text")]
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// HTML id attribute if present
|
||||
/// </summary>
|
||||
[JsonPropertyName("id")]
|
||||
public string? Id { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document tree depth at the header element
|
||||
/// </summary>
|
||||
[JsonPropertyName("depth")]
|
||||
public uint Depth { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Byte offset in original HTML document
|
||||
/// </summary>
|
||||
[JsonPropertyName("html_offset")]
|
||||
public uint HtmlOffset { get; init; } = 0;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HeaderMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HeaderMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HeaderMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HeaderMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HeaderMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
65
packages/csharp/src/Kreuzberg/HeadingContext.cs
generated
Normal file
65
packages/csharp/src/Kreuzberg/HeadingContext.cs
generated
Normal file
@@ -0,0 +1,65 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Heading context for a chunk within a Markdown document.
|
||||
///
|
||||
/// Contains the heading hierarchy from document root to this chunk's section.
|
||||
/// </summary>
|
||||
public sealed record HeadingContext
|
||||
{
|
||||
/// <summary>
|
||||
/// The heading hierarchy from document root to this chunk's section.
|
||||
/// Index 0 is the outermost (h1), last element is the most specific.
|
||||
/// </summary>
|
||||
[JsonPropertyName("headings")]
|
||||
public List<HeadingLevel> Headings { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HeadingContext"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HeadingContext FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HeadingContext>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HeadingContext from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HeadingContext from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/HeadingLevel.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/HeadingLevel.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A single heading in the hierarchy.
|
||||
/// </summary>
|
||||
public sealed record HeadingLevel
|
||||
{
|
||||
/// <summary>
|
||||
/// Heading depth (1 = h1, 2 = h2, etc.)
|
||||
/// </summary>
|
||||
[JsonPropertyName("level")]
|
||||
public byte Level { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// The text content of the heading.
|
||||
/// </summary>
|
||||
[JsonPropertyName("text")]
|
||||
public required string Text { get; init; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HeadingLevel"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HeadingLevel FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HeadingLevel>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HeadingLevel from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HeadingLevel from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
94
packages/csharp/src/Kreuzberg/HierarchicalBlock.cs
generated
Normal file
94
packages/csharp/src/Kreuzberg/HierarchicalBlock.cs
generated
Normal file
@@ -0,0 +1,94 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// A text block with hierarchy level assignment.
|
||||
///
|
||||
/// Represents a block of text with semantic heading information extracted from
|
||||
/// font size clustering and hierarchical analysis.
|
||||
/// </summary>
|
||||
public sealed record HierarchicalBlock
|
||||
{
|
||||
/// <summary>
|
||||
/// The text content of this block
|
||||
/// </summary>
|
||||
[JsonPropertyName("text")]
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// The font size of the text in this block
|
||||
/// </summary>
|
||||
[JsonPropertyName("font_size")]
|
||||
public float FontSize { get; init; } = 0.0f;
|
||||
|
||||
/// <summary>
|
||||
/// The hierarchy level of this block (H1-H6 or Body)
|
||||
///
|
||||
/// Levels correspond to HTML heading tags:
|
||||
/// - "h1": Top-level heading
|
||||
/// - "h2": Secondary heading
|
||||
/// - "h3": Tertiary heading
|
||||
/// - "h4": Quaternary heading
|
||||
/// - "h5": Quinary heading
|
||||
/// - "h6": Senary heading
|
||||
/// - "body": Body text (no heading level)
|
||||
/// </summary>
|
||||
[JsonPropertyName("level")]
|
||||
public required string Level { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Bounding box information for the block
|
||||
///
|
||||
/// Contains coordinates as (left, top, right, bottom) in PDF units.
|
||||
/// </summary>
|
||||
[JsonPropertyName("bbox")]
|
||||
public List<float>? Bbox { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HierarchicalBlock"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HierarchicalBlock FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HierarchicalBlock>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HierarchicalBlock from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HierarchicalBlock from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
101
packages/csharp/src/Kreuzberg/HierarchyConfig.cs
generated
Normal file
101
packages/csharp/src/Kreuzberg/HierarchyConfig.cs
generated
Normal file
@@ -0,0 +1,101 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Hierarchy extraction configuration for PDF text structure analysis.
|
||||
///
|
||||
/// Enables extraction of document hierarchy levels (H1-H6) based on font size
|
||||
/// clustering and semantic analysis. When enabled, hierarchical blocks are
|
||||
/// included in page content.
|
||||
/// </summary>
|
||||
public sealed record HierarchyConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Enable hierarchy extraction
|
||||
/// </summary>
|
||||
[JsonPropertyName("enabled")]
|
||||
public bool Enabled { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Number of font size clusters to use for hierarchy levels (1-7)
|
||||
///
|
||||
/// Default: 6, which provides H1-H6 heading levels with body text.
|
||||
/// Larger values create more fine-grained hierarchy levels.
|
||||
/// </summary>
|
||||
[JsonPropertyName("k_clusters")]
|
||||
public ulong KClusters { get; init; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// Include bounding box information in hierarchy blocks
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_bbox")]
|
||||
public bool IncludeBbox { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// OCR coverage threshold for smart OCR triggering (0.0-1.0)
|
||||
///
|
||||
/// Determines when OCR should be triggered based on text block coverage.
|
||||
/// OCR is triggered when text blocks cover less than this fraction of the page.
|
||||
/// Default: 0.5 (trigger OCR if less than 50% of page has text)
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr_coverage_threshold")]
|
||||
public float? OcrCoverageThreshold { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HierarchyConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HierarchyConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HierarchyConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HierarchyConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HierarchyConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static HierarchyConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.HierarchyConfigDefault();
|
||||
var jsonPtr = NativeMethods.HierarchyConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.HierarchyConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<HierarchyConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
153
packages/csharp/src/Kreuzberg/HtmlMetadata.cs
generated
Normal file
153
packages/csharp/src/Kreuzberg/HtmlMetadata.cs
generated
Normal file
@@ -0,0 +1,153 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// HTML metadata extracted from HTML documents.
|
||||
///
|
||||
/// Includes document-level metadata, Open Graph data, Twitter Card metadata,
|
||||
/// and extracted structural elements (headers, links, images, structured data).
|
||||
/// </summary>
|
||||
public sealed record HtmlMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Document title from `<title>` tag
|
||||
/// </summary>
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document description from `<meta name="description">` tag
|
||||
/// </summary>
|
||||
[JsonPropertyName("description")]
|
||||
public string? Description { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document keywords from `<meta name="keywords">` tag, split on commas
|
||||
/// </summary>
|
||||
[JsonPropertyName("keywords")]
|
||||
public List<string> Keywords { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Document author from `<meta name="author">` tag
|
||||
/// </summary>
|
||||
[JsonPropertyName("author")]
|
||||
public string? Author { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Canonical URL from `<link rel="canonical">` tag
|
||||
/// </summary>
|
||||
[JsonPropertyName("canonical_url")]
|
||||
public string? CanonicalUrl { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Base URL from `<base href="">` tag for resolving relative URLs
|
||||
/// </summary>
|
||||
[JsonPropertyName("base_href")]
|
||||
public string? BaseHref { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document language from `lang` attribute
|
||||
/// </summary>
|
||||
[JsonPropertyName("language")]
|
||||
public string? Language { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Document text direction from `dir` attribute
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(TextDirectionJsonConverter))]
|
||||
[JsonPropertyName("text_direction")]
|
||||
public TextDirection? TextDirection { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Open Graph metadata (og:* properties) for social media
|
||||
/// Keys like "title", "description", "image", "url", etc.
|
||||
/// </summary>
|
||||
[JsonPropertyName("open_graph")]
|
||||
public Dictionary<string, string> OpenGraph { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
/// <summary>
|
||||
/// Twitter Card metadata (twitter:* properties)
|
||||
/// Keys like "card", "site", "creator", "title", "description", "image", etc.
|
||||
/// </summary>
|
||||
[JsonPropertyName("twitter_card")]
|
||||
public Dictionary<string, string> TwitterCard { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
/// <summary>
|
||||
/// Additional meta tags not covered by specific fields
|
||||
/// Keys are meta name/property attributes, values are content
|
||||
/// </summary>
|
||||
[JsonPropertyName("meta_tags")]
|
||||
public Dictionary<string, string> MetaTags { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
/// <summary>
|
||||
/// Extracted header elements with hierarchy
|
||||
/// </summary>
|
||||
[JsonPropertyName("headers")]
|
||||
public List<HeaderMetadata> Headers { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Extracted hyperlinks with type classification
|
||||
/// </summary>
|
||||
[JsonPropertyName("links")]
|
||||
public List<LinkMetadata> Links { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Extracted images with source and dimensions
|
||||
/// </summary>
|
||||
[JsonPropertyName("images")]
|
||||
public List<ImageMetadataType> Images { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Extracted structured data blocks
|
||||
/// </summary>
|
||||
[JsonPropertyName("structured_data")]
|
||||
public List<StructuredData> StructuredData { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HtmlMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HtmlMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HtmlMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HtmlMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HtmlMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
110
packages/csharp/src/Kreuzberg/HtmlOutputConfig.cs
generated
Normal file
110
packages/csharp/src/Kreuzberg/HtmlOutputConfig.cs
generated
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for styled HTML output.
|
||||
///
|
||||
/// When set on `ExtractionConfig.html_output` alongside
|
||||
/// `output_format = OutputFormat.Html`, the pipeline builds a
|
||||
/// `StyledHtmlRenderer`(crate.rendering.StyledHtmlRenderer) instead of
|
||||
/// the plain comrak-based renderer.
|
||||
/// </summary>
|
||||
public sealed record HtmlOutputConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Inline CSS string injected into the output after the theme stylesheet.
|
||||
/// Concatenated after `css_file` content when both are set.
|
||||
/// </summary>
|
||||
[JsonPropertyName("css")]
|
||||
public string? Css { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Path to a CSS file loaded once at renderer construction time.
|
||||
/// Concatenated before `css` when both are set.
|
||||
/// </summary>
|
||||
[JsonPropertyName("css_file")]
|
||||
public string? CssFile { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Built-in colour/typography theme. Default: `HtmlTheme.Unstyled`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("theme")]
|
||||
public HtmlTheme Theme { get; init; } = HtmlTheme.Unstyled;
|
||||
|
||||
/// <summary>
|
||||
/// CSS class prefix applied to every emitted class name.
|
||||
///
|
||||
/// Default: `"kb-"`. Change this if your host application already uses
|
||||
/// classes that start with `kb-`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("class_prefix")]
|
||||
public string ClassPrefix { get; init; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// When `true` (default), write the resolved CSS into a `<style>` block
|
||||
/// immediately after the opening `<div class="{prefix}doc">`.
|
||||
///
|
||||
/// Set to `false` to emit only the structural markup and wire up your
|
||||
/// own stylesheet targeting the `kb-*` class names.
|
||||
/// </summary>
|
||||
[JsonPropertyName("embed_css")]
|
||||
public bool EmbedCss { get; init; } = true;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="HtmlOutputConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static HtmlOutputConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<HtmlOutputConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse HtmlOutputConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse HtmlOutputConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static HtmlOutputConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.HtmlOutputConfigDefault();
|
||||
var jsonPtr = NativeMethods.HtmlOutputConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.HtmlOutputConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<HtmlOutputConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
79
packages/csharp/src/Kreuzberg/HtmlTheme.cs
generated
Normal file
79
packages/csharp/src/Kreuzberg/HtmlTheme.cs
generated
Normal file
@@ -0,0 +1,79 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Built-in HTML theme selection.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(HtmlThemeJsonConverter))]
|
||||
public enum HtmlTheme
|
||||
{
|
||||
/// <summary>
|
||||
/// Sensible defaults: system font stack, neutral colours, readable line
|
||||
/// measure. CSS custom properties (`--kb-*`) are all defined so user CSS
|
||||
/// can override individual values.
|
||||
/// </summary>
|
||||
[JsonPropertyName("default")]
|
||||
Default,
|
||||
/// <summary>
|
||||
/// GitHub Markdown-inspired palette and spacing.
|
||||
/// </summary>
|
||||
[JsonPropertyName("github")]
|
||||
GitHub,
|
||||
/// <summary>
|
||||
/// Dark background, light text.
|
||||
/// </summary>
|
||||
[JsonPropertyName("dark")]
|
||||
Dark,
|
||||
/// <summary>
|
||||
/// Minimal light theme with generous whitespace.
|
||||
/// </summary>
|
||||
[JsonPropertyName("light")]
|
||||
Light,
|
||||
/// <summary>
|
||||
/// No built-in stylesheet emitted. CSS custom properties are still defined
|
||||
/// on `:root` so user stylesheets can reference `var(--kb-*)` tokens.
|
||||
/// </summary>
|
||||
[JsonPropertyName("unstyled")]
|
||||
Unstyled,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="HtmlTheme"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class HtmlThemeJsonConverter : JsonConverter<HtmlTheme>
|
||||
{
|
||||
public override HtmlTheme Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"default" => HtmlTheme.Default,
|
||||
"github" => HtmlTheme.GitHub,
|
||||
"dark" => HtmlTheme.Dark,
|
||||
"light" => HtmlTheme.Light,
|
||||
"unstyled" => HtmlTheme.Unstyled,
|
||||
_ => throw new JsonException($"Unknown HtmlTheme value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, HtmlTheme value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
HtmlTheme.Default => "default",
|
||||
HtmlTheme.GitHub => "github",
|
||||
HtmlTheme.Dark => "dark",
|
||||
HtmlTheme.Light => "light",
|
||||
HtmlTheme.Unstyled => "unstyled",
|
||||
_ => throw new JsonException($"Unknown HtmlTheme value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
172
packages/csharp/src/Kreuzberg/ImageExtractionConfig.cs
generated
Normal file
172
packages/csharp/src/Kreuzberg/ImageExtractionConfig.cs
generated
Normal file
@@ -0,0 +1,172 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Image extraction configuration.
|
||||
/// </summary>
|
||||
public sealed record ImageExtractionConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Extract images from documents
|
||||
/// </summary>
|
||||
[JsonPropertyName("extract_images")]
|
||||
public bool ExtractImages { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Target DPI for image normalization
|
||||
/// </summary>
|
||||
[JsonPropertyName("target_dpi")]
|
||||
public int TargetDpi { get; init; } = 300;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum dimension for images (width or height)
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_image_dimension")]
|
||||
public int MaxImageDimension { get; init; } = 4096;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to inject image reference placeholders into markdown output.
|
||||
/// When `true` (default), image references like ``
|
||||
/// are appended to the markdown. Set to `false` to extract images as data
|
||||
/// without polluting the markdown output.
|
||||
/// </summary>
|
||||
[JsonPropertyName("inject_placeholders")]
|
||||
public bool InjectPlaceholders { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Automatically adjust DPI based on image content
|
||||
/// </summary>
|
||||
[JsonPropertyName("auto_adjust_dpi")]
|
||||
public bool AutoAdjustDpi { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum DPI threshold
|
||||
/// </summary>
|
||||
[JsonPropertyName("min_dpi")]
|
||||
public int MinDpi { get; init; } = 72;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum DPI threshold
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_dpi")]
|
||||
public int MaxDpi { get; init; } = 600;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of image objects to extract per PDF page.
|
||||
///
|
||||
/// Some PDFs (e.g. technical diagrams stored as thousands of raster fragments)
|
||||
/// can trigger extremely long or indefinite extraction times when every image
|
||||
/// object on a dense page is decoded individually via the PDF extractor. Setting this
|
||||
/// limit causes kreuzberg to stop collecting individual images once the count
|
||||
/// per page reaches the cap and emit a warning instead.
|
||||
///
|
||||
/// `None` (default) means no limit — all images are extracted.
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_images_per_page")]
|
||||
public uint? MaxImagesPerPage { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// When `true` (default), extracted images are classified by kind and grouped
|
||||
/// into clusters where they appear to belong to one figure.
|
||||
/// </summary>
|
||||
[JsonPropertyName("classify")]
|
||||
public bool Classify { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// When `true`, full-page renders produced during OCR preprocessing are captured
|
||||
/// and returned as `ImageKind.PageRaster` entries in `ExtractionResult.images`.
|
||||
///
|
||||
/// **PDF + OCR only.** No rasters are captured for non-PDF inputs or when the
|
||||
/// document-level OCR bypass is active (whole-document backend). When OCR is
|
||||
/// enabled and this flag is set but the active backend skips per-page rendering,
|
||||
/// a `ProcessingWarning` is emitted in `ExtractionResult.processing_warnings`.
|
||||
///
|
||||
/// Defaults to `false`. Enable when downstream consumers need page thumbnails
|
||||
/// (e.g. citation previews, visual grounding).
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_page_rasters")]
|
||||
public bool IncludePageRasters { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on extracted images and include the recognized text in the document content.
|
||||
///
|
||||
/// When `true` (default) and `ExtractionConfig.ocr` is configured, extracted images
|
||||
/// are processed with the configured OCR backend. Set to `false` to extract images
|
||||
/// without OCR processing, even when OCR is enabled.
|
||||
/// </summary>
|
||||
[JsonPropertyName("run_ocr_on_images")]
|
||||
public bool RunOcrOnImages { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// When `true`, image OCR results are rendered as plain text without the
|
||||
/// `` markdown placeholder. Only takes effect when `run_ocr_on_images`
|
||||
/// is also `true`.
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr_text_only")]
|
||||
public bool OcrTextOnly { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// When `true` and `ocr_text_only` is `false`, append the OCR text after
|
||||
/// the image placeholder in the rendered output.
|
||||
/// </summary>
|
||||
[JsonPropertyName("append_ocr_text")]
|
||||
public bool AppendOcrText { get; init; } = false;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ImageExtractionConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ImageExtractionConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ImageExtractionConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ImageExtractionConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ImageExtractionConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static ImageExtractionConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.ImageExtractionConfigDefault();
|
||||
var jsonPtr = NativeMethods.ImageExtractionConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.ImageExtractionConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<ImageExtractionConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
125
packages/csharp/src/Kreuzberg/ImageKind.cs
generated
Normal file
125
packages/csharp/src/Kreuzberg/ImageKind.cs
generated
Normal file
@@ -0,0 +1,125 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Heuristic classification of what an image likely depicts.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ImageKindJsonConverter))]
|
||||
public enum ImageKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Photographic image (natural scene, photograph)
|
||||
/// </summary>
|
||||
[JsonPropertyName("photograph")]
|
||||
Photograph,
|
||||
/// <summary>
|
||||
/// Technical or schematic diagram
|
||||
/// </summary>
|
||||
[JsonPropertyName("diagram")]
|
||||
Diagram,
|
||||
/// <summary>
|
||||
/// Chart, graph, or plot
|
||||
/// </summary>
|
||||
[JsonPropertyName("chart")]
|
||||
Chart,
|
||||
/// <summary>
|
||||
/// Freehand or technical drawing
|
||||
/// </summary>
|
||||
[JsonPropertyName("drawing")]
|
||||
Drawing,
|
||||
/// <summary>
|
||||
/// Text-heavy image (scanned text, document)
|
||||
/// </summary>
|
||||
[JsonPropertyName("text_block")]
|
||||
TextBlock,
|
||||
/// <summary>
|
||||
/// Decorative element or border
|
||||
/// </summary>
|
||||
[JsonPropertyName("decoration")]
|
||||
Decoration,
|
||||
/// <summary>
|
||||
/// Logo or brand mark
|
||||
/// </summary>
|
||||
[JsonPropertyName("logo")]
|
||||
Logo,
|
||||
/// <summary>
|
||||
/// Small icon
|
||||
/// </summary>
|
||||
[JsonPropertyName("icon")]
|
||||
Icon,
|
||||
/// <summary>
|
||||
/// Fragment of a larger tiled image (tile of a technical drawing)
|
||||
/// </summary>
|
||||
[JsonPropertyName("tile_fragment")]
|
||||
TileFragment,
|
||||
/// <summary>
|
||||
/// Mask or transparency map
|
||||
/// </summary>
|
||||
[JsonPropertyName("mask")]
|
||||
Mask,
|
||||
/// <summary>
|
||||
/// Full-page render produced during OCR preprocessing; used as a citation thumbnail.
|
||||
/// </summary>
|
||||
[JsonPropertyName("page_raster")]
|
||||
PageRaster,
|
||||
/// <summary>
|
||||
/// Could not classify with reasonable confidence
|
||||
/// </summary>
|
||||
[JsonPropertyName("unknown")]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ImageKind"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ImageKindJsonConverter : JsonConverter<ImageKind>
|
||||
{
|
||||
public override ImageKind Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"photograph" => ImageKind.Photograph,
|
||||
"diagram" => ImageKind.Diagram,
|
||||
"chart" => ImageKind.Chart,
|
||||
"drawing" => ImageKind.Drawing,
|
||||
"text_block" => ImageKind.TextBlock,
|
||||
"decoration" => ImageKind.Decoration,
|
||||
"logo" => ImageKind.Logo,
|
||||
"icon" => ImageKind.Icon,
|
||||
"tile_fragment" => ImageKind.TileFragment,
|
||||
"mask" => ImageKind.Mask,
|
||||
"page_raster" => ImageKind.PageRaster,
|
||||
"unknown" => ImageKind.Unknown,
|
||||
_ => throw new JsonException($"Unknown ImageKind value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ImageKind value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ImageKind.Photograph => "photograph",
|
||||
ImageKind.Diagram => "diagram",
|
||||
ImageKind.Chart => "chart",
|
||||
ImageKind.Drawing => "drawing",
|
||||
ImageKind.TextBlock => "text_block",
|
||||
ImageKind.Decoration => "decoration",
|
||||
ImageKind.Logo => "logo",
|
||||
ImageKind.Icon => "icon",
|
||||
ImageKind.TileFragment => "tile_fragment",
|
||||
ImageKind.Mask => "mask",
|
||||
ImageKind.PageRaster => "page_raster",
|
||||
ImageKind.Unknown => "unknown",
|
||||
_ => throw new JsonException($"Unknown ImageKind value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
82
packages/csharp/src/Kreuzberg/ImageMetadata.cs
generated
Normal file
82
packages/csharp/src/Kreuzberg/ImageMetadata.cs
generated
Normal file
@@ -0,0 +1,82 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Image metadata extracted from image files.
|
||||
///
|
||||
/// Includes dimensions, format, and EXIF data.
|
||||
/// </summary>
|
||||
public sealed record ImageMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Image width in pixels
|
||||
/// </summary>
|
||||
[JsonPropertyName("width")]
|
||||
public uint Width { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Image height in pixels
|
||||
/// </summary>
|
||||
[JsonPropertyName("height")]
|
||||
public uint Height { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Image format (e.g., "PNG", "JPEG", "TIFF")
|
||||
/// </summary>
|
||||
[JsonPropertyName("format")]
|
||||
public string Format { get; init; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// EXIF metadata tags
|
||||
/// </summary>
|
||||
[JsonPropertyName("exif")]
|
||||
public Dictionary<string, string> Exif { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ImageMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ImageMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ImageMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ImageMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ImageMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
93
packages/csharp/src/Kreuzberg/ImageMetadataType.cs
generated
Normal file
93
packages/csharp/src/Kreuzberg/ImageMetadataType.cs
generated
Normal file
@@ -0,0 +1,93 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Image element metadata.
|
||||
/// </summary>
|
||||
public sealed record ImageMetadataType
|
||||
{
|
||||
/// <summary>
|
||||
/// Image source (URL, data URI, or SVG content)
|
||||
/// </summary>
|
||||
[JsonPropertyName("src")]
|
||||
public required string Src { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Alternative text from alt attribute
|
||||
/// </summary>
|
||||
[JsonPropertyName("alt")]
|
||||
public string? Alt { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Title attribute
|
||||
/// </summary>
|
||||
[JsonPropertyName("title")]
|
||||
public string? Title { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Image dimensions as (width, height) if available
|
||||
/// </summary>
|
||||
[JsonPropertyName("dimensions")]
|
||||
public List<uint>? Dimensions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Image type classification
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ImageTypeJsonConverter))]
|
||||
[JsonPropertyName("image_type")]
|
||||
public required ImageType ImageType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Additional attributes as key-value pairs
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public List<List<string>> Attributes { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ImageMetadataType"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ImageMetadataType FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ImageMetadataType>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ImageMetadataType from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ImageMetadataType from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
112
packages/csharp/src/Kreuzberg/ImagePreprocessingConfig.cs
generated
Normal file
112
packages/csharp/src/Kreuzberg/ImagePreprocessingConfig.cs
generated
Normal file
@@ -0,0 +1,112 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Image preprocessing configuration for OCR.
|
||||
///
|
||||
/// These settings control how images are preprocessed before OCR to improve
|
||||
/// text recognition quality. Different preprocessing strategies work better
|
||||
/// for different document types.
|
||||
/// </summary>
|
||||
public sealed record ImagePreprocessingConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Target DPI for the image (300 is standard, 600 for small text).
|
||||
/// </summary>
|
||||
[JsonPropertyName("target_dpi")]
|
||||
public int TargetDpi { get; init; } = 300;
|
||||
|
||||
/// <summary>
|
||||
/// Auto-detect and correct image rotation.
|
||||
/// </summary>
|
||||
[JsonPropertyName("auto_rotate")]
|
||||
public bool AutoRotate { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Correct skew (tilted images).
|
||||
/// </summary>
|
||||
[JsonPropertyName("deskew")]
|
||||
public bool Deskew { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Remove noise from the image.
|
||||
/// </summary>
|
||||
[JsonPropertyName("denoise")]
|
||||
public bool Denoise { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Enhance contrast for better text visibility.
|
||||
/// </summary>
|
||||
[JsonPropertyName("contrast_enhance")]
|
||||
public bool ContrastEnhance { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Binarization method: "otsu", "sauvola", "adaptive".
|
||||
/// </summary>
|
||||
[JsonPropertyName("binarization_method")]
|
||||
public string BinarizationMethod { get; init; } = "otsu";
|
||||
|
||||
/// <summary>
|
||||
/// Invert colors (white text on black → black on white).
|
||||
/// </summary>
|
||||
[JsonPropertyName("invert_colors")]
|
||||
public bool InvertColors { get; init; } = false;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ImagePreprocessingConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ImagePreprocessingConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ImagePreprocessingConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ImagePreprocessingConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ImagePreprocessingConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static ImagePreprocessingConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.ImagePreprocessingConfigDefault();
|
||||
var jsonPtr = NativeMethods.ImagePreprocessingConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.ImagePreprocessingConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<ImagePreprocessingConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
131
packages/csharp/src/Kreuzberg/ImagePreprocessingMetadata.cs
generated
Normal file
131
packages/csharp/src/Kreuzberg/ImagePreprocessingMetadata.cs
generated
Normal file
@@ -0,0 +1,131 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Image preprocessing metadata.
|
||||
///
|
||||
/// Tracks the transformations applied to an image during OCR preprocessing,
|
||||
/// including DPI normalization, resizing, and resampling.
|
||||
/// </summary>
|
||||
public sealed record ImagePreprocessingMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Original image dimensions (width, height) in pixels
|
||||
/// </summary>
|
||||
[JsonPropertyName("original_dimensions")]
|
||||
public List<ulong> OriginalDimensions { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Original image DPI (horizontal, vertical)
|
||||
/// </summary>
|
||||
[JsonPropertyName("original_dpi")]
|
||||
public List<double> OriginalDpi { get; init; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Target DPI from configuration
|
||||
/// </summary>
|
||||
[JsonPropertyName("target_dpi")]
|
||||
public int TargetDpi { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Scaling factor applied to the image
|
||||
/// </summary>
|
||||
[JsonPropertyName("scale_factor")]
|
||||
public double ScaleFactor { get; init; } = 0.0;
|
||||
|
||||
/// <summary>
|
||||
/// Whether DPI was auto-adjusted based on content
|
||||
/// </summary>
|
||||
[JsonPropertyName("auto_adjusted")]
|
||||
public bool AutoAdjusted { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Final DPI after processing
|
||||
/// </summary>
|
||||
[JsonPropertyName("final_dpi")]
|
||||
public int FinalDpi { get; init; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// New dimensions after resizing (if resized)
|
||||
/// </summary>
|
||||
[JsonPropertyName("new_dimensions")]
|
||||
public List<ulong>? NewDimensions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Resampling algorithm used ("LANCZOS3", "CATMULLROM", etc.)
|
||||
/// </summary>
|
||||
[JsonPropertyName("resample_method")]
|
||||
public required string ResampleMethod { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Whether dimensions were clamped to max_image_dimension
|
||||
/// </summary>
|
||||
[JsonPropertyName("dimension_clamped")]
|
||||
public bool DimensionClamped { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Calculated optimal DPI (if auto_adjust_dpi enabled)
|
||||
/// </summary>
|
||||
[JsonPropertyName("calculated_dpi")]
|
||||
public int? CalculatedDpi { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Whether resize was skipped (dimensions already optimal)
|
||||
/// </summary>
|
||||
[JsonPropertyName("skipped_resize")]
|
||||
public bool SkippedResize { get; init; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Error message if resize failed
|
||||
/// </summary>
|
||||
[JsonPropertyName("resize_error")]
|
||||
public string? ResizeError { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="ImagePreprocessingMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static ImagePreprocessingMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<ImagePreprocessingMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse ImagePreprocessingMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse ImagePreprocessingMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
14
packages/csharp/src/Kreuzberg/ImageProcessingException.cs
generated
Normal file
14
packages/csharp/src/Kreuzberg/ImageProcessingException.cs
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
public class ImageProcessingException : KreuzbergErrorException
|
||||
{
|
||||
public ImageProcessingException(string message) : base(message) { }
|
||||
|
||||
public ImageProcessingException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
69
packages/csharp/src/Kreuzberg/ImageType.cs
generated
Normal file
69
packages/csharp/src/Kreuzberg/ImageType.cs
generated
Normal file
@@ -0,0 +1,69 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Image type classification.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(ImageTypeJsonConverter))]
|
||||
public enum ImageType
|
||||
{
|
||||
/// <summary>
|
||||
/// Data URI image
|
||||
/// </summary>
|
||||
[JsonPropertyName("data-uri")]
|
||||
DataUri,
|
||||
/// <summary>
|
||||
/// Inline SVG
|
||||
/// </summary>
|
||||
[JsonPropertyName("inline-svg")]
|
||||
InlineSvg,
|
||||
/// <summary>
|
||||
/// External image URL
|
||||
/// </summary>
|
||||
[JsonPropertyName("external")]
|
||||
External,
|
||||
/// <summary>
|
||||
/// Relative path image
|
||||
/// </summary>
|
||||
[JsonPropertyName("relative")]
|
||||
Relative,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="ImageType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class ImageTypeJsonConverter : JsonConverter<ImageType>
|
||||
{
|
||||
public override ImageType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"data-uri" => ImageType.DataUri,
|
||||
"inline-svg" => ImageType.InlineSvg,
|
||||
"external" => ImageType.External,
|
||||
"relative" => ImageType.Relative,
|
||||
_ => throw new JsonException($"Unknown ImageType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, ImageType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
ImageType.DataUri => "data-uri",
|
||||
ImageType.InlineSvg => "inline-svg",
|
||||
ImageType.External => "external",
|
||||
ImageType.Relative => "relative",
|
||||
_ => throw new JsonException($"Unknown ImageType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
82
packages/csharp/src/Kreuzberg/InlineElement.cs
generated
Normal file
82
packages/csharp/src/Kreuzberg/InlineElement.cs
generated
Normal file
@@ -0,0 +1,82 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Inline element within a block.
|
||||
///
|
||||
/// Represents text with formatting, links, images, etc.
|
||||
/// </summary>
|
||||
public sealed record InlineElement
|
||||
{
|
||||
/// <summary>
|
||||
/// Type of inline element
|
||||
/// </summary>
|
||||
[JsonPropertyName("element_type")]
|
||||
public required InlineType ElementType { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Text content
|
||||
/// </summary>
|
||||
[JsonPropertyName("content")]
|
||||
public required string Content { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Element attributes
|
||||
/// </summary>
|
||||
[JsonPropertyName("attributes")]
|
||||
public string? Attributes { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Additional metadata (e.g., href for links, src/alt for images)
|
||||
/// </summary>
|
||||
[JsonPropertyName("metadata")]
|
||||
public Dictionary<string, string>? Metadata { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="InlineElement"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static InlineElement FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<InlineElement>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse InlineElement from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse InlineElement from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
105
packages/csharp/src/Kreuzberg/InlineType.cs
generated
Normal file
105
packages/csharp/src/Kreuzberg/InlineType.cs
generated
Normal file
@@ -0,0 +1,105 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Types of inline elements in Djot.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(InlineTypeJsonConverter))]
|
||||
public enum InlineType
|
||||
{
|
||||
[JsonPropertyName("text")]
|
||||
Text,
|
||||
[JsonPropertyName("strong")]
|
||||
Strong,
|
||||
[JsonPropertyName("emphasis")]
|
||||
Emphasis,
|
||||
[JsonPropertyName("highlight")]
|
||||
Highlight,
|
||||
[JsonPropertyName("subscript")]
|
||||
Subscript,
|
||||
[JsonPropertyName("superscript")]
|
||||
Superscript,
|
||||
[JsonPropertyName("insert")]
|
||||
Insert,
|
||||
[JsonPropertyName("delete")]
|
||||
Delete,
|
||||
[JsonPropertyName("code")]
|
||||
Code,
|
||||
[JsonPropertyName("link")]
|
||||
Link,
|
||||
[JsonPropertyName("image")]
|
||||
Image,
|
||||
[JsonPropertyName("span")]
|
||||
Span,
|
||||
[JsonPropertyName("math")]
|
||||
Math,
|
||||
[JsonPropertyName("raw_inline")]
|
||||
RawInline,
|
||||
[JsonPropertyName("footnote_ref")]
|
||||
FootnoteRef,
|
||||
[JsonPropertyName("symbol")]
|
||||
Symbol,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="InlineType"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class InlineTypeJsonConverter : JsonConverter<InlineType>
|
||||
{
|
||||
public override InlineType Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"text" => InlineType.Text,
|
||||
"strong" => InlineType.Strong,
|
||||
"emphasis" => InlineType.Emphasis,
|
||||
"highlight" => InlineType.Highlight,
|
||||
"subscript" => InlineType.Subscript,
|
||||
"superscript" => InlineType.Superscript,
|
||||
"insert" => InlineType.Insert,
|
||||
"delete" => InlineType.Delete,
|
||||
"code" => InlineType.Code,
|
||||
"link" => InlineType.Link,
|
||||
"image" => InlineType.Image,
|
||||
"span" => InlineType.Span,
|
||||
"math" => InlineType.Math,
|
||||
"raw_inline" => InlineType.RawInline,
|
||||
"footnote_ref" => InlineType.FootnoteRef,
|
||||
"symbol" => InlineType.Symbol,
|
||||
_ => throw new JsonException($"Unknown InlineType value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, InlineType value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
InlineType.Text => "text",
|
||||
InlineType.Strong => "strong",
|
||||
InlineType.Emphasis => "emphasis",
|
||||
InlineType.Highlight => "highlight",
|
||||
InlineType.Subscript => "subscript",
|
||||
InlineType.Superscript => "superscript",
|
||||
InlineType.Insert => "insert",
|
||||
InlineType.Delete => "delete",
|
||||
InlineType.Code => "code",
|
||||
InlineType.Link => "link",
|
||||
InlineType.Image => "image",
|
||||
InlineType.Span => "span",
|
||||
InlineType.Math => "math",
|
||||
InlineType.RawInline => "raw_inline",
|
||||
InlineType.FootnoteRef => "footnote_ref",
|
||||
InlineType.Symbol => "symbol",
|
||||
_ => throw new JsonException($"Unknown InlineType value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
14
packages/csharp/src/Kreuzberg/IoException.cs
generated
Normal file
14
packages/csharp/src/Kreuzberg/IoException.cs
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
public class IoException : KreuzbergErrorException
|
||||
{
|
||||
public IoException(string message) : base(message) { }
|
||||
|
||||
public IoException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
68
packages/csharp/src/Kreuzberg/JatsMetadata.cs
generated
Normal file
68
packages/csharp/src/Kreuzberg/JatsMetadata.cs
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// JATS (Journal Article Tag Suite) metadata.
|
||||
/// </summary>
|
||||
public sealed record JatsMetadata
|
||||
{
|
||||
[JsonPropertyName("copyright")]
|
||||
public string? Copyright { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("license")]
|
||||
public string? License { get; init; } = null;
|
||||
|
||||
[JsonPropertyName("history_dates")]
|
||||
public Dictionary<string, string> HistoryDates { get; init; } = new Dictionary<string, string>();
|
||||
|
||||
[JsonPropertyName("contributor_roles")]
|
||||
public List<ContributorRole> ContributorRoles { get; init; } = [];
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="JatsMetadata"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static JatsMetadata FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<JatsMetadata>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse JatsMetadata from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse JatsMetadata from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
80
packages/csharp/src/Kreuzberg/Keyword.cs
generated
Normal file
80
packages/csharp/src/Kreuzberg/Keyword.cs
generated
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Extracted keyword with metadata.
|
||||
/// </summary>
|
||||
public sealed record Keyword
|
||||
{
|
||||
/// <summary>
|
||||
/// The keyword text.
|
||||
/// </summary>
|
||||
[JsonPropertyName("text")]
|
||||
public required string Text { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Relevance score (higher is better, algorithm-specific range).
|
||||
/// </summary>
|
||||
[JsonPropertyName("score")]
|
||||
public float Score { get; init; } = 0.0f;
|
||||
|
||||
/// <summary>
|
||||
/// Algorithm that extracted this keyword.
|
||||
/// </summary>
|
||||
[JsonPropertyName("algorithm")]
|
||||
public required KeywordAlgorithm Algorithm { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional positions where keyword appears in text (character offsets).
|
||||
/// </summary>
|
||||
[JsonPropertyName("positions")]
|
||||
public List<ulong>? Positions { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="Keyword"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static Keyword FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<Keyword>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse Keyword from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse Keyword from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
55
packages/csharp/src/Kreuzberg/KeywordAlgorithm.cs
generated
Normal file
55
packages/csharp/src/Kreuzberg/KeywordAlgorithm.cs
generated
Normal file
@@ -0,0 +1,55 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
/// <summary>
|
||||
/// Keyword algorithm selection.
|
||||
/// </summary>
|
||||
[JsonConverter(typeof(KeywordAlgorithmJsonConverter))]
|
||||
public enum KeywordAlgorithm
|
||||
{
|
||||
/// <summary>
|
||||
/// YAKE (Yet Another Keyword Extractor) - statistical approach
|
||||
/// </summary>
|
||||
[JsonPropertyName("yake")]
|
||||
Yake,
|
||||
/// <summary>
|
||||
/// RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
|
||||
/// </summary>
|
||||
[JsonPropertyName("rake")]
|
||||
Rake,
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Custom JSON converter for <see cref="KeywordAlgorithm"/> that respects explicit variant names.
|
||||
/// </summary>
|
||||
internal sealed class KeywordAlgorithmJsonConverter : JsonConverter<KeywordAlgorithm>
|
||||
{
|
||||
public override KeywordAlgorithm Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
|
||||
{
|
||||
var value = reader.GetString();
|
||||
return value switch
|
||||
{
|
||||
"yake" => KeywordAlgorithm.Yake,
|
||||
"rake" => KeywordAlgorithm.Rake,
|
||||
_ => throw new JsonException($"Unknown KeywordAlgorithm value: {value}")
|
||||
};
|
||||
}
|
||||
|
||||
public override void Write(Utf8JsonWriter writer, KeywordAlgorithm value, JsonSerializerOptions options)
|
||||
{
|
||||
var str = value switch
|
||||
{
|
||||
KeywordAlgorithm.Yake => "yake",
|
||||
KeywordAlgorithm.Rake => "rake",
|
||||
_ => throw new JsonException($"Unknown KeywordAlgorithm value: {value}")
|
||||
};
|
||||
writer.WriteStringValue(str);
|
||||
}
|
||||
}
|
||||
117
packages/csharp/src/Kreuzberg/KeywordConfig.cs
generated
Normal file
117
packages/csharp/src/Kreuzberg/KeywordConfig.cs
generated
Normal file
@@ -0,0 +1,117 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Keyword extraction configuration.
|
||||
/// </summary>
|
||||
public sealed record KeywordConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Algorithm to use for extraction.
|
||||
/// </summary>
|
||||
[JsonPropertyName("algorithm")]
|
||||
public KeywordAlgorithm? Algorithm { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of keywords to extract (default: 10).
|
||||
/// </summary>
|
||||
[JsonPropertyName("max_keywords")]
|
||||
public ulong MaxKeywords { get; init; } = 10;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum score threshold (0.0-1.0, default: 0.0).
|
||||
///
|
||||
/// Keywords with scores below this threshold are filtered out.
|
||||
/// Note: Score ranges differ between algorithms.
|
||||
/// </summary>
|
||||
[JsonPropertyName("min_score")]
|
||||
public float MinScore { get; init; } = 0.0f;
|
||||
|
||||
/// <summary>
|
||||
/// N-gram range for keyword extraction (min, max).
|
||||
///
|
||||
/// (1, 1) = unigrams only
|
||||
/// (1, 2) = unigrams and bigrams
|
||||
/// (1, 3) = unigrams, bigrams, and trigrams (default)
|
||||
/// </summary>
|
||||
[JsonPropertyName("ngram_range")]
|
||||
public List<ulong>? NgramRange { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Language code for stopword filtering (e.g., "en", "de", "fr").
|
||||
///
|
||||
/// If null, no stopword filtering is applied.
|
||||
/// </summary>
|
||||
[JsonPropertyName("language")]
|
||||
public string? Language { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// YAKE-specific tuning parameters.
|
||||
/// </summary>
|
||||
[JsonPropertyName("yake_params")]
|
||||
public YakeParams? YakeParams { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// RAKE-specific tuning parameters.
|
||||
/// </summary>
|
||||
[JsonPropertyName("rake_params")]
|
||||
public RakeParams? RakeParams { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="KeywordConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static KeywordConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<KeywordConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse KeywordConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse KeywordConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
public static KeywordConfig Default()
|
||||
{
|
||||
var nativeResult = NativeMethods.KeywordConfigDefault();
|
||||
var jsonPtr = NativeMethods.KeywordConfigToJson(nativeResult);
|
||||
var json = global::System.Runtime.InteropServices.Marshal.PtrToStringUTF8(jsonPtr);
|
||||
NativeMethods.FreeString(jsonPtr);
|
||||
NativeMethods.KeywordConfigFree(nativeResult);
|
||||
return JsonSerializer.Deserialize<KeywordConfig>(json ?? "null", JsonOptions)!;
|
||||
}
|
||||
}
|
||||
35
packages/csharp/src/Kreuzberg/KreuzbergErrorException.cs
generated
Normal file
35
packages/csharp/src/Kreuzberg/KreuzbergErrorException.cs
generated
Normal file
@@ -0,0 +1,35 @@
|
||||
// This file is auto-generated by alef. DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Main error type for all Kreuzberg operations.
|
||||
///
|
||||
/// All errors in Kreuzberg use this enum, which preserves error chains
|
||||
/// and provides context for debugging.
|
||||
///
|
||||
/// # Variants
|
||||
///
|
||||
/// - `Io` - File system and I/O errors (always bubble up)
|
||||
/// - `Parsing` - Document parsing errors (corrupt files, unsupported features)
|
||||
/// - `Ocr` - OCR processing errors
|
||||
/// - `Validation` - Input validation errors (invalid paths, config, parameters)
|
||||
/// - `Cache` - Cache operation errors (non-fatal, can be ignored)
|
||||
/// - `ImageProcessing` - Image manipulation errors
|
||||
/// - `Serialization` - JSON/MessagePack serialization errors
|
||||
/// - `MissingDependency` - Missing optional dependencies (tesseract, etc.)
|
||||
/// - `Plugin` - Plugin-specific errors
|
||||
/// - `LockPoisoned` - Mutex/RwLock poisoning (should not happen in normal operation)
|
||||
/// - `UnsupportedFormat` - Unsupported MIME type or file format
|
||||
/// - `Other` - Catch-all for uncommon errors
|
||||
/// </summary>
|
||||
public class KreuzbergErrorException : KreuzbergException
|
||||
{
|
||||
public KreuzbergErrorException(string message) : base(message) { }
|
||||
|
||||
public KreuzbergErrorException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user