This commit is contained in:
210
packages/csharp/src/Kreuzberg/FileExtractionConfig.cs
generated
Normal file
210
packages/csharp/src/Kreuzberg/FileExtractionConfig.cs
generated
Normal file
@@ -0,0 +1,210 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
#nullable enable
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
/// <summary>
|
||||
/// Per-file extraction configuration overrides for batch processing.
|
||||
///
|
||||
/// All fields are `Option<T>` — `None` means "use the batch-level default."
|
||||
/// This type is used with `batch_extract_files` and
|
||||
/// `batch_extract_bytes` to allow heterogeneous
|
||||
/// extraction settings within a single batch.
|
||||
///
|
||||
/// # Excluded Fields
|
||||
///
|
||||
/// The following `ExtractionConfig` fields are batch-level only and
|
||||
/// cannot be overridden per file:
|
||||
/// - `max_concurrent_extractions` — controls batch parallelism
|
||||
/// - `use_cache` — global caching policy
|
||||
/// - `acceleration` — shared ONNX execution provider
|
||||
/// - `security_limits` — global archive security policy
|
||||
/// </summary>
|
||||
public sealed record FileExtractionConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// Override quality post-processing for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("enable_quality_processing")]
|
||||
public bool? EnableQualityProcessing { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override OCR configuration for this file (null in the Option = use batch default).
|
||||
/// </summary>
|
||||
[JsonPropertyName("ocr")]
|
||||
public OcrConfig? Ocr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override force OCR for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("force_ocr")]
|
||||
public bool? ForceOcr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override force OCR pages for this file (1-indexed page numbers).
|
||||
/// </summary>
|
||||
[JsonPropertyName("force_ocr_pages")]
|
||||
public List<uint>? ForceOcrPages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override disable OCR for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("disable_ocr")]
|
||||
public bool? DisableOcr { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override chunking configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("chunking")]
|
||||
public ChunkingConfig? Chunking { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override content filtering configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("content_filter")]
|
||||
public ContentFilterConfig? ContentFilter { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override image extraction configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("images")]
|
||||
public ImageExtractionConfig? Images { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override PDF options for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("pdf_options")]
|
||||
public PdfConfig? PdfOptions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override token reduction for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("token_reduction")]
|
||||
public TokenReductionOptions? TokenReduction { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override language detection for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("language_detection")]
|
||||
public LanguageDetectionConfig? LanguageDetection { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override page extraction for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("pages")]
|
||||
public PageConfig? Pages { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override keyword extraction for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("keywords")]
|
||||
public KeywordConfig? Keywords { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override post-processor for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("postprocessor")]
|
||||
public PostProcessorConfig? Postprocessor { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override HTML conversion options for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("html_options")]
|
||||
public string? HtmlOptions { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override result format for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("result_format")]
|
||||
public ResultFormat? ResultFormat { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override output content format for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("output_format")]
|
||||
public OutputFormat? OutputFormat { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override document structure output for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("include_document_structure")]
|
||||
public bool? IncludeDocumentStructure { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override layout detection for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("layout")]
|
||||
public LayoutDetectionConfig? Layout { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override per-file extraction timeout in seconds.
|
||||
///
|
||||
/// When set, the extraction for this file will be canceled after the
|
||||
/// specified duration. A timed-out file produces an error result without
|
||||
/// affecting other files in the batch.
|
||||
/// </summary>
|
||||
[JsonPropertyName("timeout_secs")]
|
||||
public ulong? TimeoutSecs { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override tree-sitter configuration for this file.
|
||||
/// </summary>
|
||||
[JsonPropertyName("tree_sitter")]
|
||||
public TreeSitterConfig? TreeSitter { get; init; } = null;
|
||||
|
||||
/// <summary>
|
||||
/// Override structured extraction configuration for this file.
|
||||
///
|
||||
/// When set, enables LLM-based structured extraction with a JSON schema
|
||||
/// for this specific file. The extracted content is sent to a VLM/LLM
|
||||
/// and the response is parsed according to the provided schema.
|
||||
/// </summary>
|
||||
[JsonPropertyName("structured_extraction")]
|
||||
public StructuredExtractionConfig? StructuredExtraction { get; init; } = null;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parse a <see cref="FileExtractionConfig"/> from a JSON string.
|
||||
/// </summary>
|
||||
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
|
||||
public static FileExtractionConfig FromJson(string json)
|
||||
{
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<FileExtractionConfig>(json, JsonOptions)
|
||||
?? throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: deserializer returned null");
|
||||
}
|
||||
catch (KreuzbergException)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: {e.Message}", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
|
||||
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
|
||||
/// (nullable C# fields default to null and would override required Rust fields with
|
||||
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
|
||||
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
|
||||
{
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user