// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef #nullable enable using System; using System.Collections.Generic; using System.Text.Json; using System.Text.Json.Serialization; namespace Kreuzberg; /// /// Per-file extraction configuration overrides for batch processing. /// /// All fields are `Option<T>` — `None` means "use the batch-level default." /// This type is used with `batch_extract_files` and /// `batch_extract_bytes` to allow heterogeneous /// extraction settings within a single batch. /// /// # Excluded Fields /// /// The following `ExtractionConfig` fields are batch-level only and /// cannot be overridden per file: /// - `max_concurrent_extractions` — controls batch parallelism /// - `use_cache` — global caching policy /// - `acceleration` — shared ONNX execution provider /// - `security_limits` — global archive security policy /// public sealed record FileExtractionConfig { /// /// Override quality post-processing for this file. /// [JsonPropertyName("enable_quality_processing")] public bool? EnableQualityProcessing { get; init; } = null; /// /// Override OCR configuration for this file (null in the Option = use batch default). /// [JsonPropertyName("ocr")] public OcrConfig? Ocr { get; init; } = null; /// /// Override force OCR for this file. /// [JsonPropertyName("force_ocr")] public bool? ForceOcr { get; init; } = null; /// /// Override force OCR pages for this file (1-indexed page numbers). /// [JsonPropertyName("force_ocr_pages")] public List? ForceOcrPages { get; init; } = null; /// /// Override disable OCR for this file. /// [JsonPropertyName("disable_ocr")] public bool? DisableOcr { get; init; } = null; /// /// Override chunking configuration for this file. /// [JsonPropertyName("chunking")] public ChunkingConfig? Chunking { get; init; } = null; /// /// Override content filtering configuration for this file. /// [JsonPropertyName("content_filter")] public ContentFilterConfig? ContentFilter { get; init; } = null; /// /// Override image extraction configuration for this file. /// [JsonPropertyName("images")] public ImageExtractionConfig? Images { get; init; } = null; /// /// Override PDF options for this file. /// [JsonPropertyName("pdf_options")] public PdfConfig? PdfOptions { get; init; } = null; /// /// Override token reduction for this file. /// [JsonPropertyName("token_reduction")] public TokenReductionOptions? TokenReduction { get; init; } = null; /// /// Override language detection for this file. /// [JsonPropertyName("language_detection")] public LanguageDetectionConfig? LanguageDetection { get; init; } = null; /// /// Override page extraction for this file. /// [JsonPropertyName("pages")] public PageConfig? Pages { get; init; } = null; /// /// Override keyword extraction for this file. /// [JsonPropertyName("keywords")] public KeywordConfig? Keywords { get; init; } = null; /// /// Override post-processor for this file. /// [JsonPropertyName("postprocessor")] public PostProcessorConfig? Postprocessor { get; init; } = null; /// /// Override HTML conversion options for this file. /// [JsonPropertyName("html_options")] public string? HtmlOptions { get; init; } = null; /// /// Override result format for this file. /// [JsonPropertyName("result_format")] public ResultFormat? ResultFormat { get; init; } = null; /// /// Override output content format for this file. /// [JsonPropertyName("output_format")] public OutputFormat? OutputFormat { get; init; } = null; /// /// Override document structure output for this file. /// [JsonPropertyName("include_document_structure")] public bool? IncludeDocumentStructure { get; init; } = null; /// /// Override layout detection for this file. /// [JsonPropertyName("layout")] public LayoutDetectionConfig? Layout { get; init; } = null; /// /// Override per-file extraction timeout in seconds. /// /// When set, the extraction for this file will be canceled after the /// specified duration. A timed-out file produces an error result without /// affecting other files in the batch. /// [JsonPropertyName("timeout_secs")] public ulong? TimeoutSecs { get; init; } = null; /// /// Override tree-sitter configuration for this file. /// [JsonPropertyName("tree_sitter")] public TreeSitterConfig? TreeSitter { get; init; } = null; /// /// Override structured extraction configuration for this file. /// /// When set, enables LLM-based structured extraction with a JSON schema /// for this specific file. The extracted content is sent to a VLM/LLM /// and the response is parsed according to the provided schema. /// [JsonPropertyName("structured_extraction")] public StructuredExtractionConfig? StructuredExtraction { get; init; } = null; /// /// Parse a from a JSON string. /// /// When the JSON cannot be deserialised. public static FileExtractionConfig FromJson(string json) { try { return JsonSerializer.Deserialize(json, JsonOptions) ?? throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: deserializer returned null"); } catch (KreuzbergException) { throw; } catch (Exception e) { throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: {e.Message}", e); } } private static readonly JsonSerializerOptions JsonOptions = new() { DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault, Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, }; /// Options for serializing config/input objects to FFI. Strips nulls /// (nullable C# fields default to null and would override required Rust fields with /// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept. private static readonly JsonSerializerOptions JsonSerializationOptions = new() { DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, }; }