Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,210 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
#nullable enable
using System;
using System.Collections.Generic;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace Kreuzberg;
/// <summary>
/// Per-file extraction configuration overrides for batch processing.
///
/// All fields are `Option&lt;T&gt;` — `None` means "use the batch-level default."
/// This type is used with `batch_extract_files` and
/// `batch_extract_bytes` to allow heterogeneous
/// extraction settings within a single batch.
///
/// # Excluded Fields
///
/// The following `ExtractionConfig` fields are batch-level only and
/// cannot be overridden per file:
/// - `max_concurrent_extractions` — controls batch parallelism
/// - `use_cache` — global caching policy
/// - `acceleration` — shared ONNX execution provider
/// - `security_limits` — global archive security policy
/// </summary>
public sealed record FileExtractionConfig
{
/// <summary>
/// Override quality post-processing for this file.
/// </summary>
[JsonPropertyName("enable_quality_processing")]
public bool? EnableQualityProcessing { get; init; } = null;
/// <summary>
/// Override OCR configuration for this file (null in the Option = use batch default).
/// </summary>
[JsonPropertyName("ocr")]
public OcrConfig? Ocr { get; init; } = null;
/// <summary>
/// Override force OCR for this file.
/// </summary>
[JsonPropertyName("force_ocr")]
public bool? ForceOcr { get; init; } = null;
/// <summary>
/// Override force OCR pages for this file (1-indexed page numbers).
/// </summary>
[JsonPropertyName("force_ocr_pages")]
public List<uint>? ForceOcrPages { get; init; } = null;
/// <summary>
/// Override disable OCR for this file.
/// </summary>
[JsonPropertyName("disable_ocr")]
public bool? DisableOcr { get; init; } = null;
/// <summary>
/// Override chunking configuration for this file.
/// </summary>
[JsonPropertyName("chunking")]
public ChunkingConfig? Chunking { get; init; } = null;
/// <summary>
/// Override content filtering configuration for this file.
/// </summary>
[JsonPropertyName("content_filter")]
public ContentFilterConfig? ContentFilter { get; init; } = null;
/// <summary>
/// Override image extraction configuration for this file.
/// </summary>
[JsonPropertyName("images")]
public ImageExtractionConfig? Images { get; init; } = null;
/// <summary>
/// Override PDF options for this file.
/// </summary>
[JsonPropertyName("pdf_options")]
public PdfConfig? PdfOptions { get; init; } = null;
/// <summary>
/// Override token reduction for this file.
/// </summary>
[JsonPropertyName("token_reduction")]
public TokenReductionOptions? TokenReduction { get; init; } = null;
/// <summary>
/// Override language detection for this file.
/// </summary>
[JsonPropertyName("language_detection")]
public LanguageDetectionConfig? LanguageDetection { get; init; } = null;
/// <summary>
/// Override page extraction for this file.
/// </summary>
[JsonPropertyName("pages")]
public PageConfig? Pages { get; init; } = null;
/// <summary>
/// Override keyword extraction for this file.
/// </summary>
[JsonPropertyName("keywords")]
public KeywordConfig? Keywords { get; init; } = null;
/// <summary>
/// Override post-processor for this file.
/// </summary>
[JsonPropertyName("postprocessor")]
public PostProcessorConfig? Postprocessor { get; init; } = null;
/// <summary>
/// Override HTML conversion options for this file.
/// </summary>
[JsonPropertyName("html_options")]
public string? HtmlOptions { get; init; } = null;
/// <summary>
/// Override result format for this file.
/// </summary>
[JsonPropertyName("result_format")]
public ResultFormat? ResultFormat { get; init; } = null;
/// <summary>
/// Override output content format for this file.
/// </summary>
[JsonPropertyName("output_format")]
public OutputFormat? OutputFormat { get; init; } = null;
/// <summary>
/// Override document structure output for this file.
/// </summary>
[JsonPropertyName("include_document_structure")]
public bool? IncludeDocumentStructure { get; init; } = null;
/// <summary>
/// Override layout detection for this file.
/// </summary>
[JsonPropertyName("layout")]
public LayoutDetectionConfig? Layout { get; init; } = null;
/// <summary>
/// Override per-file extraction timeout in seconds.
///
/// When set, the extraction for this file will be canceled after the
/// specified duration. A timed-out file produces an error result without
/// affecting other files in the batch.
/// </summary>
[JsonPropertyName("timeout_secs")]
public ulong? TimeoutSecs { get; init; } = null;
/// <summary>
/// Override tree-sitter configuration for this file.
/// </summary>
[JsonPropertyName("tree_sitter")]
public TreeSitterConfig? TreeSitter { get; init; } = null;
/// <summary>
/// Override structured extraction configuration for this file.
///
/// When set, enables LLM-based structured extraction with a JSON schema
/// for this specific file. The extracted content is sent to a VLM/LLM
/// and the response is parsed according to the provided schema.
/// </summary>
[JsonPropertyName("structured_extraction")]
public StructuredExtractionConfig? StructuredExtraction { get; init; } = null;
/// <summary>
/// Parse a <see cref="FileExtractionConfig"/> from a JSON string.
/// </summary>
/// <exception cref="KreuzbergException">When the JSON cannot be deserialised.</exception>
public static FileExtractionConfig FromJson(string json)
{
try
{
return JsonSerializer.Deserialize<FileExtractionConfig>(json, JsonOptions)
?? throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: deserializer returned null");
}
catch (KreuzbergException)
{
throw;
}
catch (Exception e)
{
throw new KreuzbergException($"Failed to parse FileExtractionConfig from JSON: {e.Message}", e);
}
}
private static readonly JsonSerializerOptions JsonOptions = new()
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault,
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
};
/// <summary>Options for serializing config/input objects to FFI. Strips nulls
/// (nullable C# fields default to null and would override required Rust fields with
/// non-deserialisable nulls) but preserves explicit false/0 so caller intent is kept.</summary>
private static readonly JsonSerializerOptions JsonSerializationOptions = new()
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) },
};
}