packages/swift/Sources/Kreuzberg/Kreuzberg.swift

// Generated by alef. Do not edit by hand.
// swift-format-ignore-file

import Foundation
import RustBridge

public struct CacheStats: Codable, Sendable, Hashable {
    public let totalFiles: UInt
    public let totalSizeMb: Double
    public let availableSpaceMb: Double
    public let oldestFileAgeDays: Double
    public let newestFileAgeDays: Double
    public init(totalFiles: UInt, totalSizeMb: Double, availableSpaceMb: Double, oldestFileAgeDays: Double, newestFileAgeDays: Double) {
        self.totalFiles = totalFiles
        self.totalSizeMb = totalSizeMb
        self.availableSpaceMb = availableSpaceMb
        self.oldestFileAgeDays = oldestFileAgeDays
        self.newestFileAgeDays = newestFileAgeDays
    }
    private enum CodingKeys: String, CodingKey {
        case totalFiles = "total_files"
        case totalSizeMb = "total_size_mb"
        case availableSpaceMb = "available_space_mb"
        case oldestFileAgeDays = "oldest_file_age_days"
        case newestFileAgeDays = "newest_file_age_days"
    }
}

// MARK: - Internal FFI conversions for CacheStats
internal extension CacheStats {
    init(_ rb: RustBridge.CacheStatsRef) throws {
        self.totalFiles = rb.totalFiles()
        self.totalSizeMb = rb.totalSizeMb()
        self.availableSpaceMb = rb.availableSpaceMb()
        self.oldestFileAgeDays = rb.oldestFileAgeDays()
        self.newestFileAgeDays = rb.newestFileAgeDays()
    }
    func intoRust() throws -> RustBridge.CacheStats {
        return RustBridge.CacheStats(self.totalFiles, self.totalSizeMb, self.availableSpaceMb, self.oldestFileAgeDays, self.newestFileAgeDays)
    }
}

/// Hardware acceleration configuration for ONNX Runtime models.
///
/// Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used
/// for inference in layout detection and embedding generation.
///
/// # Example
///
/// ```rust
/// use kreuzberg::AccelerationConfig;
///
/// // Auto-select: CoreML on macOS, CUDA on Linux, CPU elsewhere
/// let config = AccelerationConfig::default();
///
/// // Force CPU only
/// let config = AccelerationConfig {
///     provider: kreuzberg::ExecutionProviderType::Cpu,
///     ..Default::default()
/// };
/// ```
public struct AccelerationConfig: Codable, Sendable, Hashable {
    /// Execution provider to use for ONNX inference.
    public let provider: ExecutionProviderType
    /// GPU device ID (for CUDA/TensorRT). Ignored for CPU/CoreML/Auto.
    public let deviceId: UInt32
    public init(provider: ExecutionProviderType, deviceId: UInt32) {
        self.provider = provider
        self.deviceId = deviceId
    }
    private enum CodingKeys: String, CodingKey {
        case provider = "provider"
        case deviceId = "device_id"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.provider = try container.decode(ExecutionProviderType.self, forKey: .provider)
        self.deviceId = try container.decodeIfPresent(UInt32.self, forKey: .deviceId) ?? 0
    }
}

// MARK: - Internal FFI conversions for AccelerationConfig
internal extension AccelerationConfig {
    init(_ rb: RustBridge.AccelerationConfigRef) throws {
        self.provider = ExecutionProviderType(rawValue: rb.provider().toString()) ?? { fatalError("Unknown ExecutionProviderType: \(rb.provider().toString())") }()
        self.deviceId = rb.deviceId()
    }
    func intoRust() throws -> RustBridge.AccelerationConfig {
        return RustBridge.AccelerationConfig(try self.provider.intoRust(), self.deviceId)
    }
}

/// Cross-extractor content filtering configuration.
///
/// Controls whether "furniture" content (headers, footers, page numbers,
/// watermarks, repeating text) is included in or stripped from extraction
/// results. Applies across all extractors (PDF, DOCX, RTF, ODT, HTML, etc.)
/// with format-specific implementation.
///
/// When `None` on `ExtractionConfig`, each extractor uses its current
/// default behavior unchanged.
public struct ContentFilterConfig: Codable, Sendable, Hashable {
    /// Include running headers in extraction output.
    ///
    /// - PDF: Disables top-margin furniture stripping and prevents the layout
    ///   model from treating `PageHeader`-classified regions as furniture.
    /// - DOCX: Includes document headers in text output.
    /// - RTF/ODT: Headers already included; this is a no-op when true.
    /// - HTML/EPUB: Keeps `<header>` element content.
    ///
    /// Default: `false` (headers are stripped or excluded).
    public let includeHeaders: Bool
    /// Include running footers in extraction output.
    ///
    /// - PDF: Disables bottom-margin furniture stripping and prevents the layout
    ///   model from treating `PageFooter`-classified regions as furniture.
    /// - DOCX: Includes document footers in text output.
    /// - RTF/ODT: Footers already included; this is a no-op when true.
    /// - HTML/EPUB: Keeps `<footer>` element content.
    ///
    /// Default: `false` (footers are stripped or excluded).
    public let includeFooters: Bool
    /// Enable the heuristic cross-page repeating text detector.
    ///
    /// When `true` (default), text that repeats verbatim across a supermajority
    /// of pages is classified as furniture and stripped.  Disable this if brand
    /// names or repeated headings are being incorrectly removed by the heuristic.
    ///
    /// Note: when a layout-detection model is active, the model may independently
    /// classify page-header / page-footer regions as furniture on a per-page basis.
    /// To preserve those regions, set `include_headers = true`, `include_footers = true`,
    /// or both, in addition to disabling this flag.
    ///
    /// Primarily affects PDF extraction.
    ///
    /// Default: `true`.
    public let stripRepeatingText: Bool
    /// Include watermark text in extraction output.
    ///
    /// - PDF: Keeps watermark artifacts and arXiv identifiers.
    /// - Other formats: No effect currently.
    ///
    /// Default: `false` (watermarks are stripped).
    public let includeWatermarks: Bool
    public init(includeHeaders: Bool, includeFooters: Bool, stripRepeatingText: Bool, includeWatermarks: Bool) {
        self.includeHeaders = includeHeaders
        self.includeFooters = includeFooters
        self.stripRepeatingText = stripRepeatingText
        self.includeWatermarks = includeWatermarks
    }
    private enum CodingKeys: String, CodingKey {
        case includeHeaders = "include_headers"
        case includeFooters = "include_footers"
        case stripRepeatingText = "strip_repeating_text"
        case includeWatermarks = "include_watermarks"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.includeHeaders = try container.decodeIfPresent(Bool.self, forKey: .includeHeaders) ?? false
        self.includeFooters = try container.decodeIfPresent(Bool.self, forKey: .includeFooters) ?? false
        self.stripRepeatingText = try container.decodeIfPresent(Bool.self, forKey: .stripRepeatingText) ?? true
        self.includeWatermarks = try container.decodeIfPresent(Bool.self, forKey: .includeWatermarks) ?? false
    }
}

// MARK: - Internal FFI conversions for ContentFilterConfig
internal extension ContentFilterConfig {
    init(_ rb: RustBridge.ContentFilterConfigRef) throws {
        self.includeHeaders = rb.includeHeaders()
        self.includeFooters = rb.includeFooters()
        self.stripRepeatingText = rb.stripRepeatingText()
        self.includeWatermarks = rb.includeWatermarks()
    }
    func intoRust() throws -> RustBridge.ContentFilterConfig {
        return RustBridge.ContentFilterConfig(self.includeHeaders, self.includeFooters, self.stripRepeatingText, self.includeWatermarks)
    }
}

/// Configuration for email extraction.
public struct EmailConfig: Codable, Sendable, Hashable {
    /// Windows codepage number to use when an MSG file contains no codepage property.
    /// Defaults to `None`, which falls back to windows-1252.
    ///
    /// If an unrecognized or invalid codepage number is supplied (including 0),
    /// the behavior silently falls back to windows-1252 — the same as when the
    /// MSG file itself contains an unrecognized codepage. No error or warning is
    /// emitted. Users should verify output when supplying unusual values.
    ///
    /// Common values:
    /// - 1250: Central European (Polish, Czech, Hungarian, etc.)
    /// - 1251: Cyrillic (Russian, Ukrainian, Bulgarian, etc.)
    /// - 1252: Western European (default)
    /// - 1253: Greek
    /// - 1254: Turkish
    /// - 1255: Hebrew
    /// - 1256: Arabic
    /// - 932:  Japanese (Shift-JIS)
    /// - 936:  Simplified Chinese (GBK)
    public let msgFallbackCodepage: UInt32?
    public init(msgFallbackCodepage: UInt32? = nil) {
        self.msgFallbackCodepage = msgFallbackCodepage
    }
    private enum CodingKeys: String, CodingKey {
        case msgFallbackCodepage = "msg_fallback_codepage"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.msgFallbackCodepage = try container.decodeIfPresent(UInt32.self, forKey: .msgFallbackCodepage) ?? nil
    }
}

// MARK: - Internal FFI conversions for EmailConfig
internal extension EmailConfig {
    init(_ rb: RustBridge.EmailConfigRef) throws {
        self.msgFallbackCodepage = rb.msgFallbackCodepage()
    }
    func intoRust() throws -> RustBridge.EmailConfig {
        return RustBridge.EmailConfig(self.msgFallbackCodepage)
    }
}

/// Main extraction configuration.
///
/// This struct contains all configuration options for the extraction process.
/// It can be loaded from TOML, YAML, or JSON files, or created programmatically.
///
/// # Example
///
/// ```rust
/// use kreuzberg::core::config::ExtractionConfig;
///
/// // Create with defaults
/// let config = ExtractionConfig::default();
///
/// // Load from TOML file
/// // let config = ExtractionConfig::from_toml_file("kreuzberg.toml")?;
/// ```
public typealias ExtractionConfig = RustBridge.ExtractionConfig

/// Per-file extraction configuration overrides for batch processing.
///
/// All fields are `Option<T>` — `None` means "use the batch-level default."
/// This type is used with `batch_extract_files` and
/// `batch_extract_bytes` to allow heterogeneous
/// extraction settings within a single batch.
///
/// # Excluded Fields
///
/// The following `ExtractionConfig` fields are batch-level only and
/// cannot be overridden per file:
/// - `max_concurrent_extractions` — controls batch parallelism
/// - `use_cache` — global caching policy
/// - `acceleration` — shared ONNX execution provider
/// - `security_limits` — global archive security policy
///
/// # Example
///
/// ```rust
/// use kreuzberg::FileExtractionConfig;
///
/// // Override just OCR forcing for a specific file
/// let config = FileExtractionConfig {
///     force_ocr: Some(true),
///     ..Default::default()
/// };
/// ```
public typealias FileExtractionConfig = RustBridge.FileExtractionConfig

/// Batch item for byte array extraction.
///
/// Used with `batch_extract_bytes` and `batch_extract_bytes_sync`
/// to represent a single item in a batch extraction job.
public typealias BatchBytesItem = RustBridge.BatchBytesItem

/// Batch item for file extraction.
///
/// Used with `batch_extract_files` and `batch_extract_files_sync`
/// to represent a single file in a batch extraction job.
public typealias BatchFileItem = RustBridge.BatchFileItem

/// Image extraction configuration.
public struct ImageExtractionConfig: Codable, Sendable, Hashable {
    /// Extract images from documents
    public let extractImages: Bool
    /// Target DPI for image normalization
    public let targetDpi: Int32
    /// Maximum dimension for images (width or height)
    public let maxImageDimension: Int32
    /// Whether to inject image reference placeholders into markdown output.
    /// When `true` (default), image references like `![Image 1](embedded:p1_i0)`
    /// are appended to the markdown. Set to `false` to extract images as data
    /// without polluting the markdown output.
    public let injectPlaceholders: Bool
    /// Automatically adjust DPI based on image content
    public let autoAdjustDpi: Bool
    /// Minimum DPI threshold
    public let minDpi: Int32
    /// Maximum DPI threshold
    public let maxDpi: Int32
    /// Maximum number of image objects to extract per PDF page.
    ///
    /// Some PDFs (e.g. technical diagrams stored as thousands of raster fragments)
    /// can trigger extremely long or indefinite extraction times when every image
    /// object on a dense page is decoded individually via the PDF extractor. Setting this
    /// limit causes kreuzberg to stop collecting individual images once the count
    /// per page reaches the cap and emit a warning instead.
    ///
    /// `None` (default) means no limit — all images are extracted.
    public let maxImagesPerPage: UInt32?
    /// When `true` (default), extracted images are classified by kind and grouped
    /// into clusters where they appear to belong to one figure.
    public let classify: Bool
    /// When `true`, full-page renders produced during OCR preprocessing are captured
    /// and returned as `ImageKind::PageRaster` entries in `ExtractionResult.images`.
    ///
    /// **PDF + OCR only.** No rasters are captured for non-PDF inputs or when the
    /// document-level OCR bypass is active (whole-document backend). When OCR is
    /// enabled and this flag is set but the active backend skips per-page rendering,
    /// a `ProcessingWarning` is emitted in `ExtractionResult.processing_warnings`.
    ///
    /// Defaults to `false`. Enable when downstream consumers need page thumbnails
    /// (e.g. citation previews, visual grounding).
    public let includePageRasters: Bool
    /// Run OCR on extracted images and include the recognized text in the document content.
    ///
    /// When `true` (default) and `ExtractionConfig.ocr` is configured, extracted images
    /// are processed with the configured OCR backend. Set to `false` to extract images
    /// without OCR processing, even when OCR is enabled.
    public let runOcrOnImages: Bool
    /// When `true`, image OCR results are rendered as plain text without the
    /// `![...](...)` markdown placeholder. Only takes effect when `run_ocr_on_images`
    /// is also `true`.
    public let ocrTextOnly: Bool
    /// When `true` and `ocr_text_only` is `false`, append the OCR text after
    /// the image placeholder in the rendered output.
    public let appendOcrText: Bool
    public init(extractImages: Bool, targetDpi: Int32, maxImageDimension: Int32, injectPlaceholders: Bool, autoAdjustDpi: Bool, minDpi: Int32, maxDpi: Int32, maxImagesPerPage: UInt32? = nil, classify: Bool, includePageRasters: Bool, runOcrOnImages: Bool, ocrTextOnly: Bool, appendOcrText: Bool) {
        self.extractImages = extractImages
        self.targetDpi = targetDpi
        self.maxImageDimension = maxImageDimension
        self.injectPlaceholders = injectPlaceholders
        self.autoAdjustDpi = autoAdjustDpi
        self.minDpi = minDpi
        self.maxDpi = maxDpi
        self.maxImagesPerPage = maxImagesPerPage
        self.classify = classify
        self.includePageRasters = includePageRasters
        self.runOcrOnImages = runOcrOnImages
        self.ocrTextOnly = ocrTextOnly
        self.appendOcrText = appendOcrText
    }
    private enum CodingKeys: String, CodingKey {
        case extractImages = "extract_images"
        case targetDpi = "target_dpi"
        case maxImageDimension = "max_image_dimension"
        case injectPlaceholders = "inject_placeholders"
        case autoAdjustDpi = "auto_adjust_dpi"
        case minDpi = "min_dpi"
        case maxDpi = "max_dpi"
        case maxImagesPerPage = "max_images_per_page"
        case classify = "classify"
        case includePageRasters = "include_page_rasters"
        case runOcrOnImages = "run_ocr_on_images"
        case ocrTextOnly = "ocr_text_only"
        case appendOcrText = "append_ocr_text"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.extractImages = try container.decodeIfPresent(Bool.self, forKey: .extractImages) ?? true
        self.targetDpi = try container.decodeIfPresent(Int32.self, forKey: .targetDpi) ?? 300
        self.maxImageDimension = try container.decodeIfPresent(Int32.self, forKey: .maxImageDimension) ?? 4096
        self.injectPlaceholders = try container.decodeIfPresent(Bool.self, forKey: .injectPlaceholders) ?? true
        self.autoAdjustDpi = try container.decodeIfPresent(Bool.self, forKey: .autoAdjustDpi) ?? true
        self.minDpi = try container.decodeIfPresent(Int32.self, forKey: .minDpi) ?? 72
        self.maxDpi = try container.decodeIfPresent(Int32.self, forKey: .maxDpi) ?? 600
        self.maxImagesPerPage = try container.decodeIfPresent(UInt32.self, forKey: .maxImagesPerPage) ?? nil
        self.classify = try container.decodeIfPresent(Bool.self, forKey: .classify) ?? true
        self.includePageRasters = try container.decodeIfPresent(Bool.self, forKey: .includePageRasters) ?? false
        self.runOcrOnImages = try container.decodeIfPresent(Bool.self, forKey: .runOcrOnImages) ?? true
        self.ocrTextOnly = try container.decodeIfPresent(Bool.self, forKey: .ocrTextOnly) ?? false
        self.appendOcrText = try container.decodeIfPresent(Bool.self, forKey: .appendOcrText) ?? false
    }
}

// MARK: - Internal FFI conversions for ImageExtractionConfig
internal extension ImageExtractionConfig {
    init(_ rb: RustBridge.ImageExtractionConfigRef) throws {
        self.extractImages = rb.extractImages()
        self.targetDpi = rb.targetDpi()
        self.maxImageDimension = rb.maxImageDimension()
        self.injectPlaceholders = rb.injectPlaceholders()
        self.autoAdjustDpi = rb.autoAdjustDpi()
        self.minDpi = rb.minDpi()
        self.maxDpi = rb.maxDpi()
        self.maxImagesPerPage = rb.maxImagesPerPage()
        self.classify = rb.classify()
        self.includePageRasters = rb.includePageRasters()
        self.runOcrOnImages = rb.runOcrOnImages()
        self.ocrTextOnly = rb.ocrTextOnly()
        self.appendOcrText = rb.appendOcrText()
    }
    func intoRust() throws -> RustBridge.ImageExtractionConfig {
        return RustBridge.ImageExtractionConfig(self.extractImages, self.targetDpi, self.maxImageDimension, self.injectPlaceholders, self.autoAdjustDpi, self.minDpi, self.maxDpi, self.maxImagesPerPage, self.classify, self.includePageRasters, self.runOcrOnImages, self.ocrTextOnly, self.appendOcrText)
    }
}

/// Token reduction configuration.
public struct TokenReductionOptions: Codable, Sendable, Hashable {
    /// Reduction mode: "off", "light", "moderate", "aggressive", "maximum"
    public let mode: String
    /// Preserve important words (capitalized, technical terms)
    public let preserveImportantWords: Bool
    public init(mode: String, preserveImportantWords: Bool) {
        self.mode = mode
        self.preserveImportantWords = preserveImportantWords
    }
    private enum CodingKeys: String, CodingKey {
        case mode = "mode"
        case preserveImportantWords = "preserve_important_words"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.mode = try container.decodeIfPresent(String.self, forKey: .mode) ?? ""
        self.preserveImportantWords = try container.decodeIfPresent(Bool.self, forKey: .preserveImportantWords) ?? true
    }
}

// MARK: - Internal FFI conversions for TokenReductionOptions
internal extension TokenReductionOptions {
    init(_ rb: RustBridge.TokenReductionOptionsRef) throws {
        self.mode = rb.mode().toString()
        self.preserveImportantWords = rb.preserveImportantWords()
    }
    func intoRust() throws -> RustBridge.TokenReductionOptions {
        return RustBridge.TokenReductionOptions(RustString(self.mode), self.preserveImportantWords)
    }
}

/// Language detection configuration.
public struct LanguageDetectionConfig: Codable, Sendable, Hashable {
    /// Enable language detection
    public let enabled: Bool
    /// Minimum confidence threshold (0.0-1.0)
    public let minConfidence: Double
    /// Detect multiple languages in the document
    public let detectMultiple: Bool
    public init(enabled: Bool, minConfidence: Double, detectMultiple: Bool) {
        self.enabled = enabled
        self.minConfidence = minConfidence
        self.detectMultiple = detectMultiple
    }
    private enum CodingKeys: String, CodingKey {
        case enabled = "enabled"
        case minConfidence = "min_confidence"
        case detectMultiple = "detect_multiple"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.enabled = try container.decodeIfPresent(Bool.self, forKey: .enabled) ?? true
        self.minConfidence = try container.decodeIfPresent(Double.self, forKey: .minConfidence) ?? 0.8
        self.detectMultiple = try container.decodeIfPresent(Bool.self, forKey: .detectMultiple) ?? false
    }
}

// MARK: - Internal FFI conversions for LanguageDetectionConfig
internal extension LanguageDetectionConfig {
    init(_ rb: RustBridge.LanguageDetectionConfigRef) throws {
        self.enabled = rb.enabled()
        self.minConfidence = rb.minConfidence()
        self.detectMultiple = rb.detectMultiple()
    }
    func intoRust() throws -> RustBridge.LanguageDetectionConfig {
        return RustBridge.LanguageDetectionConfig(self.enabled, self.minConfidence, self.detectMultiple)
    }
}

/// Configuration for styled HTML output.
///
/// When set on [`ExtractionConfig::html_output`] alongside
/// `output_format = OutputFormat::Html`, the pipeline builds a
/// [`StyledHtmlRenderer`](crate::rendering::StyledHtmlRenderer) instead of
/// the plain comrak-based renderer.
///
/// # Example
///
/// ```rust
/// use kreuzberg::core::config::{HtmlOutputConfig, HtmlTheme};
///
/// let config = HtmlOutputConfig {
///     theme: HtmlTheme::GitHub,
///     css: Some(".kb-p { font-size: 1.1rem; }".to_string()),
///     ..Default::default()
/// };
/// ```
public typealias HtmlOutputConfig = RustBridge.HtmlOutputConfig

/// Layout detection configuration.
///
/// Controls layout detection behavior in the extraction pipeline.
/// When set on [`ExtractionConfig`](super::ExtractionConfig), layout detection
/// is enabled for PDF extraction.
public struct LayoutDetectionConfig: Codable, Sendable, Hashable {
    /// Confidence threshold override (None = use model default).
    public let confidenceThreshold: Float?
    /// Whether to apply postprocessing heuristics (default: true).
    public let applyHeuristics: Bool
    /// Table structure recognition model.
    ///
    /// Controls which model is used for table cell detection within layout-detected
    /// table regions. Defaults to [`TableModel::Tatr`].
    public let tableModel: TableModel
    /// Hardware acceleration for ONNX models (layout detection + table structure).
    ///
    /// When set, controls which execution provider (CPU, CUDA, CoreML, TensorRT)
    /// is used for inference. Defaults to `None` (auto-select per platform).
    public let acceleration: AccelerationConfig?
    public init(confidenceThreshold: Float? = nil, applyHeuristics: Bool, tableModel: TableModel, acceleration: AccelerationConfig? = nil) {
        self.confidenceThreshold = confidenceThreshold
        self.applyHeuristics = applyHeuristics
        self.tableModel = tableModel
        self.acceleration = acceleration
    }
    private enum CodingKeys: String, CodingKey {
        case confidenceThreshold = "confidence_threshold"
        case applyHeuristics = "apply_heuristics"
        case tableModel = "table_model"
        case acceleration = "acceleration"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.confidenceThreshold = try container.decodeIfPresent(Float.self, forKey: .confidenceThreshold) ?? nil
        self.applyHeuristics = try container.decodeIfPresent(Bool.self, forKey: .applyHeuristics) ?? true
        self.tableModel = try container.decode(TableModel.self, forKey: .tableModel)
        self.acceleration = try container.decodeIfPresent(AccelerationConfig.self, forKey: .acceleration) ?? nil
    }
}

// MARK: - Internal FFI conversions for LayoutDetectionConfig
internal extension LayoutDetectionConfig {
    init(_ rb: RustBridge.LayoutDetectionConfigRef) throws {
        self.confidenceThreshold = rb.confidenceThreshold()
        self.applyHeuristics = rb.applyHeuristics()
        self.tableModel = TableModel(rawValue: rb.tableModel().toString()) ?? { fatalError("Unknown TableModel: \(rb.tableModel().toString())") }()
        self.acceleration = try rb.acceleration().map { try AccelerationConfig($0) }
    }
    func intoRust() throws -> RustBridge.LayoutDetectionConfig {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.layoutDetectionConfigFromJson(json)
    }
}

/// Configuration for an LLM provider/model via liter-llm.
///
/// Each feature (VLM OCR, VLM embeddings, structured extraction) carries
/// its own `LlmConfig`, allowing different providers per feature.
///
/// # Example
///
/// ```toml
/// [structured_extraction.llm]
/// model = "openai/gpt-4o"
/// api_key = "sk-..."  # or use KREUZBERG_LLM_API_KEY env var
/// ```
public struct LlmConfig: Codable, Sendable, Hashable {
    /// Provider/model string using liter-llm routing format.
    ///
    /// Examples: `"openai/gpt-4o"`, `"anthropic/claude-sonnet-4-20250514"`,
    /// `"groq/llama-3.1-70b-versatile"`.
    public let model: String
    /// API key for the provider. When `None`, liter-llm falls back to
    /// the provider's standard environment variable (e.g., `OPENAI_API_KEY`).
    public let apiKey: String?
    /// Custom base URL override for the provider endpoint.
    public let baseUrl: String?
    /// Request timeout in seconds (default: 60).
    public let timeoutSecs: UInt64?
    /// Maximum retry attempts (default: 3).
    public let maxRetries: UInt32?
    /// Sampling temperature for generation tasks.
    public let temperature: Double?
    /// Maximum tokens to generate.
    public let maxTokens: UInt64?
    public init(model: String, apiKey: String? = nil, baseUrl: String? = nil, timeoutSecs: UInt64? = nil, maxRetries: UInt32? = nil, temperature: Double? = nil, maxTokens: UInt64? = nil) {
        self.model = model
        self.apiKey = apiKey
        self.baseUrl = baseUrl
        self.timeoutSecs = timeoutSecs
        self.maxRetries = maxRetries
        self.temperature = temperature
        self.maxTokens = maxTokens
    }
    private enum CodingKeys: String, CodingKey {
        case model = "model"
        case apiKey = "api_key"
        case baseUrl = "base_url"
        case timeoutSecs = "timeout_secs"
        case maxRetries = "max_retries"
        case temperature = "temperature"
        case maxTokens = "max_tokens"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.model = try container.decodeIfPresent(String.self, forKey: .model) ?? ""
        self.apiKey = try container.decodeIfPresent(String.self, forKey: .apiKey) ?? nil
        self.baseUrl = try container.decodeIfPresent(String.self, forKey: .baseUrl) ?? nil
        self.timeoutSecs = try container.decodeIfPresent(UInt64.self, forKey: .timeoutSecs) ?? nil
        self.maxRetries = try container.decodeIfPresent(UInt32.self, forKey: .maxRetries) ?? nil
        self.temperature = try container.decodeIfPresent(Double.self, forKey: .temperature) ?? nil
        self.maxTokens = try container.decodeIfPresent(UInt64.self, forKey: .maxTokens) ?? nil
    }
}

// MARK: - Internal FFI conversions for LlmConfig
internal extension LlmConfig {
    init(_ rb: RustBridge.LlmConfigRef) throws {
        self.model = rb.model().toString()
        self.apiKey = rb.apiKey()?.toString()
        self.baseUrl = rb.baseUrl()?.toString()
        self.timeoutSecs = rb.timeoutSecs()
        self.maxRetries = rb.maxRetries()
        self.temperature = rb.temperature()
        self.maxTokens = rb.maxTokens()
    }
    func intoRust() throws -> RustBridge.LlmConfig {
        return RustBridge.LlmConfig(RustString(self.model), self.apiKey.map(RustString.init), self.baseUrl.map(RustString.init), self.timeoutSecs, self.maxRetries, self.temperature, self.maxTokens)
    }
}

/// Configuration for LLM-based structured data extraction.
///
/// Sends extracted document content to a VLM with a JSON schema,
/// returning structured data that conforms to the schema.
///
/// # Example
///
/// ```toml
/// [structured_extraction]
/// schema_name = "invoice_data"
/// strict = true
///
/// [structured_extraction.schema]
/// type = "object"
/// properties.vendor = { type = "string" }
/// properties.total = { type = "number" }
/// required = ["vendor", "total"]
///
/// [structured_extraction.llm]
/// model = "openai/gpt-4o"
/// ```
public typealias StructuredExtractionConfig = RustBridge.StructuredExtractionConfig

/// Quality thresholds for OCR fallback decisions and pipeline quality gating.
///
/// All fields default to the values that match the previous hardcoded behavior,
/// so `OcrQualityThresholds::default()` preserves existing semantics exactly.
public struct OcrQualityThresholds: Codable, Sendable, Hashable {
    /// Minimum total non-whitespace characters to consider text substantive.
    public let minTotalNonWhitespace: UInt
    /// Minimum non-whitespace characters per page on average.
    public let minNonWhitespacePerPage: Double
    /// Minimum character count for a word to be "meaningful".
    public let minMeaningfulWordLen: UInt
    /// Minimum count of meaningful words before text is accepted.
    public let minMeaningfulWords: UInt
    /// Minimum alphanumeric ratio (non-whitespace chars that are alphanumeric).
    public let minAlnumRatio: Double
    /// Minimum Unicode replacement characters (U+FFFD) to trigger OCR fallback.
    public let minGarbageChars: UInt
    /// Maximum fraction of short (1-2 char) words before text is considered fragmented.
    public let maxFragmentedWordRatio: Double
    /// Critical fragmentation threshold — triggers OCR regardless of meaningful words.
    /// Normal English text has ~20-30% short words. 80%+ is definitive garbage.
    public let criticalFragmentedWordRatio: Double
    /// Minimum average word length. Below this with enough words indicates garbled extraction.
    public let minAvgWordLength: Double
    /// Minimum word count before average word length check applies.
    public let minWordsForAvgLengthCheck: UInt
    /// Minimum consecutive word repetition ratio to detect column scrambling.
    public let minConsecutiveRepeatRatio: Double
    /// Minimum word count before consecutive repetition check is applied.
    public let minWordsForRepeatCheck: UInt
    /// Minimum character count for "substantive markdown" OCR skip gate.
    public let substantiveMinChars: UInt
    /// Minimum character count for "non-text content" OCR skip gate.
    public let nonTextMinChars: UInt
    /// Alphanumeric+whitespace ratio threshold for skip decisions.
    public let alnumWsRatioThreshold: Double
    /// Minimum quality score (0.0-1.0) for a pipeline stage result to be accepted.
    /// If the result from a backend scores below this, try the next backend.
    public let pipelineMinQuality: Double
    public init(minTotalNonWhitespace: UInt, minNonWhitespacePerPage: Double, minMeaningfulWordLen: UInt, minMeaningfulWords: UInt, minAlnumRatio: Double, minGarbageChars: UInt, maxFragmentedWordRatio: Double, criticalFragmentedWordRatio: Double, minAvgWordLength: Double, minWordsForAvgLengthCheck: UInt, minConsecutiveRepeatRatio: Double, minWordsForRepeatCheck: UInt, substantiveMinChars: UInt, nonTextMinChars: UInt, alnumWsRatioThreshold: Double, pipelineMinQuality: Double) {
        self.minTotalNonWhitespace = minTotalNonWhitespace
        self.minNonWhitespacePerPage = minNonWhitespacePerPage
        self.minMeaningfulWordLen = minMeaningfulWordLen
        self.minMeaningfulWords = minMeaningfulWords
        self.minAlnumRatio = minAlnumRatio
        self.minGarbageChars = minGarbageChars
        self.maxFragmentedWordRatio = maxFragmentedWordRatio
        self.criticalFragmentedWordRatio = criticalFragmentedWordRatio
        self.minAvgWordLength = minAvgWordLength
        self.minWordsForAvgLengthCheck = minWordsForAvgLengthCheck
        self.minConsecutiveRepeatRatio = minConsecutiveRepeatRatio
        self.minWordsForRepeatCheck = minWordsForRepeatCheck
        self.substantiveMinChars = substantiveMinChars
        self.nonTextMinChars = nonTextMinChars
        self.alnumWsRatioThreshold = alnumWsRatioThreshold
        self.pipelineMinQuality = pipelineMinQuality
    }
    private enum CodingKeys: String, CodingKey {
        case minTotalNonWhitespace = "min_total_non_whitespace"
        case minNonWhitespacePerPage = "min_non_whitespace_per_page"
        case minMeaningfulWordLen = "min_meaningful_word_len"
        case minMeaningfulWords = "min_meaningful_words"
        case minAlnumRatio = "min_alnum_ratio"
        case minGarbageChars = "min_garbage_chars"
        case maxFragmentedWordRatio = "max_fragmented_word_ratio"
        case criticalFragmentedWordRatio = "critical_fragmented_word_ratio"
        case minAvgWordLength = "min_avg_word_length"
        case minWordsForAvgLengthCheck = "min_words_for_avg_length_check"
        case minConsecutiveRepeatRatio = "min_consecutive_repeat_ratio"
        case minWordsForRepeatCheck = "min_words_for_repeat_check"
        case substantiveMinChars = "substantive_min_chars"
        case nonTextMinChars = "non_text_min_chars"
        case alnumWsRatioThreshold = "alnum_ws_ratio_threshold"
        case pipelineMinQuality = "pipeline_min_quality"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.minTotalNonWhitespace = try container.decodeIfPresent(UInt.self, forKey: .minTotalNonWhitespace) ?? 64
        self.minNonWhitespacePerPage = try container.decodeIfPresent(Double.self, forKey: .minNonWhitespacePerPage) ?? 32.0
        self.minMeaningfulWordLen = try container.decodeIfPresent(UInt.self, forKey: .minMeaningfulWordLen) ?? 4
        self.minMeaningfulWords = try container.decodeIfPresent(UInt.self, forKey: .minMeaningfulWords) ?? 3
        self.minAlnumRatio = try container.decodeIfPresent(Double.self, forKey: .minAlnumRatio) ?? 0.3
        self.minGarbageChars = try container.decodeIfPresent(UInt.self, forKey: .minGarbageChars) ?? 5
        self.maxFragmentedWordRatio = try container.decodeIfPresent(Double.self, forKey: .maxFragmentedWordRatio) ?? 0.6
        self.criticalFragmentedWordRatio = try container.decodeIfPresent(Double.self, forKey: .criticalFragmentedWordRatio) ?? 0.8
        self.minAvgWordLength = try container.decodeIfPresent(Double.self, forKey: .minAvgWordLength) ?? 2.0
        self.minWordsForAvgLengthCheck = try container.decodeIfPresent(UInt.self, forKey: .minWordsForAvgLengthCheck) ?? 50
        self.minConsecutiveRepeatRatio = try container.decodeIfPresent(Double.self, forKey: .minConsecutiveRepeatRatio) ?? 0.08
        self.minWordsForRepeatCheck = try container.decodeIfPresent(UInt.self, forKey: .minWordsForRepeatCheck) ?? 50
        self.substantiveMinChars = try container.decodeIfPresent(UInt.self, forKey: .substantiveMinChars) ?? 100
        self.nonTextMinChars = try container.decodeIfPresent(UInt.self, forKey: .nonTextMinChars) ?? 20
        self.alnumWsRatioThreshold = try container.decodeIfPresent(Double.self, forKey: .alnumWsRatioThreshold) ?? 0.4
        self.pipelineMinQuality = try container.decodeIfPresent(Double.self, forKey: .pipelineMinQuality) ?? 0.5
    }
}

// MARK: - Internal FFI conversions for OcrQualityThresholds
internal extension OcrQualityThresholds {
    init(_ rb: RustBridge.OcrQualityThresholdsRef) throws {
        self.minTotalNonWhitespace = rb.minTotalNonWhitespace()
        self.minNonWhitespacePerPage = rb.minNonWhitespacePerPage()
        self.minMeaningfulWordLen = rb.minMeaningfulWordLen()
        self.minMeaningfulWords = rb.minMeaningfulWords()
        self.minAlnumRatio = rb.minAlnumRatio()
        self.minGarbageChars = rb.minGarbageChars()
        self.maxFragmentedWordRatio = rb.maxFragmentedWordRatio()
        self.criticalFragmentedWordRatio = rb.criticalFragmentedWordRatio()
        self.minAvgWordLength = rb.minAvgWordLength()
        self.minWordsForAvgLengthCheck = rb.minWordsForAvgLengthCheck()
        self.minConsecutiveRepeatRatio = rb.minConsecutiveRepeatRatio()
        self.minWordsForRepeatCheck = rb.minWordsForRepeatCheck()
        self.substantiveMinChars = rb.substantiveMinChars()
        self.nonTextMinChars = rb.nonTextMinChars()
        self.alnumWsRatioThreshold = rb.alnumWsRatioThreshold()
        self.pipelineMinQuality = rb.pipelineMinQuality()
    }
    func intoRust() throws -> RustBridge.OcrQualityThresholds {
        return RustBridge.OcrQualityThresholds(self.minTotalNonWhitespace, self.minNonWhitespacePerPage, self.minMeaningfulWordLen, self.minMeaningfulWords, self.minAlnumRatio, self.minGarbageChars, self.maxFragmentedWordRatio, self.criticalFragmentedWordRatio, self.minAvgWordLength, self.minWordsForAvgLengthCheck, self.minConsecutiveRepeatRatio, self.minWordsForRepeatCheck, self.substantiveMinChars, self.nonTextMinChars, self.alnumWsRatioThreshold, self.pipelineMinQuality)
    }
}

/// A single backend stage in the OCR pipeline.
public typealias OcrPipelineStage = RustBridge.OcrPipelineStage

/// Multi-backend OCR pipeline with quality-based fallback.
///
/// Backends are tried in priority order (highest first). After each backend
/// produces output, quality is evaluated. If it meets `quality_thresholds.pipeline_min_quality`,
/// the result is accepted. Otherwise the next backend is tried.
public typealias OcrPipelineConfig = RustBridge.OcrPipelineConfig

/// OCR configuration.
public typealias OcrConfig = RustBridge.OcrConfig

/// Page extraction and tracking configuration.
///
/// Controls how pages are extracted, tracked, and represented in the extraction results.
/// When `None`, page tracking is disabled.
///
/// Page range tracking in chunk metadata (first_page/last_page) is automatically enabled
/// when page boundaries are available and chunking is configured.
public struct PageConfig: Codable, Sendable, Hashable {
    /// Extract pages as separate array (ExtractionResult.pages)
    public let extractPages: Bool
    /// Insert page markers in main content string
    public let insertPageMarkers: Bool
    /// Page marker format (use {page_num} placeholder)
    /// Default: "\n\n<!-- PAGE {page_num} -->\n\n"
    public let markerFormat: String
    public init(extractPages: Bool, insertPageMarkers: Bool, markerFormat: String) {
        self.extractPages = extractPages
        self.insertPageMarkers = insertPageMarkers
        self.markerFormat = markerFormat
    }
    private enum CodingKeys: String, CodingKey {
        case extractPages = "extract_pages"
        case insertPageMarkers = "insert_page_markers"
        case markerFormat = "marker_format"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.extractPages = try container.decodeIfPresent(Bool.self, forKey: .extractPages) ?? false
        self.insertPageMarkers = try container.decodeIfPresent(Bool.self, forKey: .insertPageMarkers) ?? false
        self.markerFormat = try container.decodeIfPresent(String.self, forKey: .markerFormat) ?? "\n\n<!-- PAGE {page_num} -->\n\n"
    }
}

// MARK: - Internal FFI conversions for PageConfig
internal extension PageConfig {
    init(_ rb: RustBridge.PageConfigRef) throws {
        self.extractPages = rb.extractPages()
        self.insertPageMarkers = rb.insertPageMarkers()
        self.markerFormat = rb.markerFormat().toString()
    }
    func intoRust() throws -> RustBridge.PageConfig {
        return RustBridge.PageConfig(self.extractPages, self.insertPageMarkers, RustString(self.markerFormat))
    }
}

/// PDF-specific configuration.
public struct PdfConfig: Codable, Sendable, Hashable {
    /// Extract images from PDF
    public let extractImages: Bool
    /// Extract tables from PDF.
    ///
    /// When `true` (default), runs pdf_oxide's native grid detector and, if it
    /// finds nothing, falls back to the heuristic text-layer reconstruction in
    /// `pdf::oxide::table::extract_tables_heuristic`. Set to `false` to skip
    /// both passes — `tables` will then be empty in the result.
    public let extractTables: Bool
    /// List of passwords to try when opening encrypted PDFs
    public let passwords: [String]?
    /// Extract PDF metadata
    public let extractMetadata: Bool
    /// Hierarchy extraction configuration (None = hierarchy extraction disabled)
    public let hierarchy: HierarchyConfig?
    /// Extract PDF annotations (text notes, highlights, links, stamps).
    /// Default: false
    public let extractAnnotations: Bool
    /// Top margin fraction (0.0–1.0) of page height to exclude headers/running heads.
    /// Default: 0.06 (6%)
    public let topMarginFraction: Float?
    /// Bottom margin fraction (0.0–1.0) of page height to exclude footers/page numbers.
    /// Default: 0.05 (5%)
    public let bottomMarginFraction: Float?
    /// Allow single-column pseudo tables in extraction results.
    ///
    /// By default, tables with fewer than 2 columns (layout-guided) or 3 columns
    /// (heuristic) are rejected. When `true`, the minimum column count is relaxed
    /// to 1, allowing single-column structured data (glossaries, itemized lists)
    /// to be emitted as tables. Other quality filters (density, sparsity, prose
    /// detection) still apply.
    public let allowSingleColumnTables: Bool
    /// Perform OCR on inline images extracted from PDF pages and attach the
    /// recognized text to each `ExtractedImage.ocr_result`. Requires Tesseract
    /// to be available; if `ExtractionConfig.ocr` is `None` the extractor
    /// falls back to `TesseractConfig::default()`. Per-image failures degrade
    /// gracefully (the image is returned without OCR text rather than failing
    /// the whole extraction). Default: `false`.
    public let ocrInlineImages: Bool
    public init(extractImages: Bool, extractTables: Bool, passwords: [String]? = nil, extractMetadata: Bool, hierarchy: HierarchyConfig? = nil, extractAnnotations: Bool, topMarginFraction: Float? = nil, bottomMarginFraction: Float? = nil, allowSingleColumnTables: Bool, ocrInlineImages: Bool) {
        self.extractImages = extractImages
        self.extractTables = extractTables
        self.passwords = passwords
        self.extractMetadata = extractMetadata
        self.hierarchy = hierarchy
        self.extractAnnotations = extractAnnotations
        self.topMarginFraction = topMarginFraction
        self.bottomMarginFraction = bottomMarginFraction
        self.allowSingleColumnTables = allowSingleColumnTables
        self.ocrInlineImages = ocrInlineImages
    }
    private enum CodingKeys: String, CodingKey {
        case extractImages = "extract_images"
        case extractTables = "extract_tables"
        case passwords = "passwords"
        case extractMetadata = "extract_metadata"
        case hierarchy = "hierarchy"
        case extractAnnotations = "extract_annotations"
        case topMarginFraction = "top_margin_fraction"
        case bottomMarginFraction = "bottom_margin_fraction"
        case allowSingleColumnTables = "allow_single_column_tables"
        case ocrInlineImages = "ocr_inline_images"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.extractImages = try container.decodeIfPresent(Bool.self, forKey: .extractImages) ?? false
        self.extractTables = try container.decodeIfPresent(Bool.self, forKey: .extractTables) ?? true
        self.passwords = try container.decodeIfPresent([String].self, forKey: .passwords) ?? nil
        self.extractMetadata = try container.decodeIfPresent(Bool.self, forKey: .extractMetadata) ?? true
        self.hierarchy = try container.decodeIfPresent(HierarchyConfig.self, forKey: .hierarchy) ?? nil
        self.extractAnnotations = try container.decodeIfPresent(Bool.self, forKey: .extractAnnotations) ?? false
        self.topMarginFraction = try container.decodeIfPresent(Float.self, forKey: .topMarginFraction) ?? nil
        self.bottomMarginFraction = try container.decodeIfPresent(Float.self, forKey: .bottomMarginFraction) ?? nil
        self.allowSingleColumnTables = try container.decodeIfPresent(Bool.self, forKey: .allowSingleColumnTables) ?? false
        self.ocrInlineImages = try container.decodeIfPresent(Bool.self, forKey: .ocrInlineImages) ?? false
    }
}

// MARK: - Internal FFI conversions for PdfConfig
internal extension PdfConfig {
    init(_ rb: RustBridge.PdfConfigRef) throws {
        self.extractImages = rb.extractImages()
        self.extractTables = rb.extractTables()
        self.passwords = rb.passwords()?.map { $0.as_str().toString() }
        self.extractMetadata = rb.extractMetadata()
        self.hierarchy = try rb.hierarchy().map { try HierarchyConfig($0) }
        self.extractAnnotations = rb.extractAnnotations()
        self.topMarginFraction = rb.topMarginFraction()
        self.bottomMarginFraction = rb.bottomMarginFraction()
        self.allowSingleColumnTables = rb.allowSingleColumnTables()
        self.ocrInlineImages = rb.ocrInlineImages()
    }
    func intoRust() throws -> RustBridge.PdfConfig {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.pdfConfigFromJson(json)
    }
}

/// Hierarchy extraction configuration for PDF text structure analysis.
///
/// Enables extraction of document hierarchy levels (H1-H6) based on font size
/// clustering and semantic analysis. When enabled, hierarchical blocks are
/// included in page content.
public struct HierarchyConfig: Codable, Sendable, Hashable {
    /// Enable hierarchy extraction
    public let enabled: Bool
    /// Number of font size clusters to use for hierarchy levels (1-7)
    ///
    /// Default: 6, which provides H1-H6 heading levels with body text.
    /// Larger values create more fine-grained hierarchy levels.
    public let kClusters: UInt
    /// Include bounding box information in hierarchy blocks
    public let includeBbox: Bool
    /// OCR coverage threshold for smart OCR triggering (0.0-1.0)
    ///
    /// Determines when OCR should be triggered based on text block coverage.
    /// OCR is triggered when text blocks cover less than this fraction of the page.
    /// Default: 0.5 (trigger OCR if less than 50% of page has text)
    public let ocrCoverageThreshold: Float?
    public init(enabled: Bool, kClusters: UInt, includeBbox: Bool, ocrCoverageThreshold: Float? = nil) {
        self.enabled = enabled
        self.kClusters = kClusters
        self.includeBbox = includeBbox
        self.ocrCoverageThreshold = ocrCoverageThreshold
    }
    private enum CodingKeys: String, CodingKey {
        case enabled = "enabled"
        case kClusters = "k_clusters"
        case includeBbox = "include_bbox"
        case ocrCoverageThreshold = "ocr_coverage_threshold"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.enabled = try container.decodeIfPresent(Bool.self, forKey: .enabled) ?? true
        self.kClusters = try container.decodeIfPresent(UInt.self, forKey: .kClusters) ?? 3
        self.includeBbox = try container.decodeIfPresent(Bool.self, forKey: .includeBbox) ?? true
        self.ocrCoverageThreshold = try container.decodeIfPresent(Float.self, forKey: .ocrCoverageThreshold) ?? nil
    }
}

// MARK: - Internal FFI conversions for HierarchyConfig
internal extension HierarchyConfig {
    init(_ rb: RustBridge.HierarchyConfigRef) throws {
        self.enabled = rb.enabled()
        self.kClusters = rb.kClusters()
        self.includeBbox = rb.includeBbox()
        self.ocrCoverageThreshold = rb.ocrCoverageThreshold()
    }
    func intoRust() throws -> RustBridge.HierarchyConfig {
        return RustBridge.HierarchyConfig(self.enabled, self.kClusters, self.includeBbox, self.ocrCoverageThreshold)
    }
}

/// Post-processor configuration.
public struct PostProcessorConfig: Codable, Sendable, Hashable {
    /// Enable post-processors
    public let enabled: Bool
    /// Whitelist of processor names to run (None = all enabled)
    public let enabledProcessors: [String]?
    /// Blacklist of processor names to skip (None = none disabled)
    public let disabledProcessors: [String]?
    /// Pre-computed AHashSet for O(1) enabled processor lookup
    public let enabledSet: [String]?
    /// Pre-computed AHashSet for O(1) disabled processor lookup
    public let disabledSet: [String]?
    public init(enabled: Bool, enabledProcessors: [String]? = nil, disabledProcessors: [String]? = nil, enabledSet: [String]? = nil, disabledSet: [String]? = nil) {
        self.enabled = enabled
        self.enabledProcessors = enabledProcessors
        self.disabledProcessors = disabledProcessors
        self.enabledSet = enabledSet
        self.disabledSet = disabledSet
    }
    private enum CodingKeys: String, CodingKey {
        case enabled = "enabled"
        case enabledProcessors = "enabled_processors"
        case disabledProcessors = "disabled_processors"
        case enabledSet = "enabled_set"
        case disabledSet = "disabled_set"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.enabled = try container.decodeIfPresent(Bool.self, forKey: .enabled) ?? true
        self.enabledProcessors = try container.decodeIfPresent([String].self, forKey: .enabledProcessors) ?? nil
        self.disabledProcessors = try container.decodeIfPresent([String].self, forKey: .disabledProcessors) ?? nil
        self.enabledSet = try container.decodeIfPresent([String].self, forKey: .enabledSet) ?? nil
        self.disabledSet = try container.decodeIfPresent([String].self, forKey: .disabledSet) ?? nil
    }
}

// MARK: - Internal FFI conversions for PostProcessorConfig
internal extension PostProcessorConfig {
    init(_ rb: RustBridge.PostProcessorConfigRef) throws {
        self.enabled = rb.enabled()
        self.enabledProcessors = rb.enabledProcessors()?.map { $0.as_str().toString() }
        self.disabledProcessors = rb.disabledProcessors()?.map { $0.as_str().toString() }
        self.enabledSet = rb.enabledSet()?.map { $0.as_str().toString() }
        self.disabledSet = rb.disabledSet()?.map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.PostProcessorConfig {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.postProcessorConfigFromJson(json)
    }
}

/// Chunking configuration.
///
/// Configures text chunking for document content, including chunk size,
/// overlap, trimming behavior, and optional embeddings.
///
/// Use `..Default::default()` when constructing to allow for future field additions:
/// ```rust
/// let config = ChunkingConfig {
///     max_characters: 500,
///     ..Default::default()
/// };
/// ```
public typealias ChunkingConfig = RustBridge.ChunkingConfig

/// Embedding configuration for text chunks.
///
/// Configures embedding generation using ONNX models via the vendored embedding engine.
/// Requires the `embeddings` feature to be enabled.
public typealias EmbeddingConfig = RustBridge.EmbeddingConfig

/// Configuration for tree-sitter language pack integration.
///
/// Controls grammar download behavior and code analysis options.
///
/// # Example (TOML)
///
/// ```toml
/// [tree_sitter]
/// languages = ["python", "rust"]
/// groups = ["web"]
///
/// [tree_sitter.process]
/// structure = true
/// comments = true
/// docstrings = true
/// ```
public typealias TreeSitterConfig = RustBridge.TreeSitterConfig

/// Processing options for tree-sitter code analysis.
///
/// Controls which analysis features are enabled when extracting code files.
public struct TreeSitterProcessConfig: Codable, Sendable, Hashable {
    /// Extract structural items (functions, classes, structs, etc.). Default: true.
    public let structure: Bool
    /// Extract import statements. Default: true.
    public let imports: Bool
    /// Extract export statements. Default: true.
    public let exports: Bool
    /// Extract comments. Default: false.
    public let comments: Bool
    /// Extract docstrings. Default: false.
    public let docstrings: Bool
    /// Extract symbol definitions. Default: false.
    public let symbols: Bool
    /// Include parse diagnostics. Default: false.
    public let diagnostics: Bool
    /// Maximum chunk size in bytes. `None` disables chunking.
    public let chunkMaxSize: UInt?
    /// Content rendering mode for code extraction.
    public let contentMode: CodeContentMode
    public init(structure: Bool, imports: Bool, exports: Bool, comments: Bool, docstrings: Bool, symbols: Bool, diagnostics: Bool, chunkMaxSize: UInt? = nil, contentMode: CodeContentMode) {
        self.structure = structure
        self.imports = imports
        self.exports = exports
        self.comments = comments
        self.docstrings = docstrings
        self.symbols = symbols
        self.diagnostics = diagnostics
        self.chunkMaxSize = chunkMaxSize
        self.contentMode = contentMode
    }
    private enum CodingKeys: String, CodingKey {
        case structure = "structure"
        case imports = "imports"
        case exports = "exports"
        case comments = "comments"
        case docstrings = "docstrings"
        case symbols = "symbols"
        case diagnostics = "diagnostics"
        case chunkMaxSize = "chunk_max_size"
        case contentMode = "content_mode"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.structure = try container.decodeIfPresent(Bool.self, forKey: .structure) ?? true
        self.imports = try container.decodeIfPresent(Bool.self, forKey: .imports) ?? true
        self.exports = try container.decodeIfPresent(Bool.self, forKey: .exports) ?? true
        self.comments = try container.decodeIfPresent(Bool.self, forKey: .comments) ?? false
        self.docstrings = try container.decodeIfPresent(Bool.self, forKey: .docstrings) ?? false
        self.symbols = try container.decodeIfPresent(Bool.self, forKey: .symbols) ?? false
        self.diagnostics = try container.decodeIfPresent(Bool.self, forKey: .diagnostics) ?? false
        self.chunkMaxSize = try container.decodeIfPresent(UInt.self, forKey: .chunkMaxSize) ?? nil
        self.contentMode = try container.decode(CodeContentMode.self, forKey: .contentMode)
    }
}

// MARK: - Internal FFI conversions for TreeSitterProcessConfig
internal extension TreeSitterProcessConfig {
    init(_ rb: RustBridge.TreeSitterProcessConfigRef) throws {
        self.structure = rb.structure()
        self.imports = rb.imports()
        self.exports = rb.exports()
        self.comments = rb.comments()
        self.docstrings = rb.docstrings()
        self.symbols = rb.symbols()
        self.diagnostics = rb.diagnostics()
        self.chunkMaxSize = rb.chunkMaxSize()
        self.contentMode = CodeContentMode(rawValue: rb.contentMode().toString()) ?? { fatalError("Unknown CodeContentMode: \(rb.contentMode().toString())") }()
    }
    func intoRust() throws -> RustBridge.TreeSitterProcessConfig {
        return RustBridge.TreeSitterProcessConfig(self.structure, self.imports, self.exports, self.comments, self.docstrings, self.symbols, self.diagnostics, self.chunkMaxSize, try self.contentMode.intoRust())
    }
}

/// A supported document format entry.
///
/// Represents a file extension and its corresponding MIME type that Kreuzberg can process.
public struct SupportedFormat: Codable, Sendable, Hashable {
    /// File extension (without leading dot), e.g., "pdf", "docx"
    public let `extension`: String
    /// MIME type string, e.g., "application/pdf"
    public let mimeType: String
    public init(`extension`: String, mimeType: String) {
        self.`extension` = `extension`
        self.mimeType = mimeType
    }
    private enum CodingKeys: String, CodingKey {
        case `extension` = "extension"
        case mimeType = "mime_type"
    }
}

// MARK: - Internal FFI conversions for SupportedFormat
internal extension SupportedFormat {
    init(_ rb: RustBridge.SupportedFormatRef) throws {
        self.`extension` = rb.extension_().toString()
        self.mimeType = rb.mimeType().toString()
    }
    func intoRust() throws -> RustBridge.SupportedFormat {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.supportedFormatFromJson(json)
    }
}

/// API server configuration.
///
/// This struct holds all configuration options for the Kreuzberg API server,
/// including host/port settings, CORS configuration, and upload limits.
///
/// # Defaults
///
/// - `host`: "127.0.0.1" (localhost only)
/// - `port`: 8000
/// - `cors_origins`: empty vector (allows all origins)
/// - `max_request_body_bytes`: 104_857_600 (100 MB)
/// - `max_multipart_field_bytes`: 104_857_600 (100 MB)
public struct ServerConfig: Codable, Sendable, Hashable {
    /// Server host address (e.g., "127.0.0.1", "0.0.0.0")
    public let host: String
    /// Server port number
    public let port: UInt16
    /// CORS allowed origins. Empty vector means allow all origins.
    ///
    /// If this is an empty vector, the server will accept requests from any origin.
    /// If populated with specific origins (e.g., `"https://example.com"`), only
    /// those origins will be allowed.
    public let corsOrigins: [String]
    /// Maximum size of request body in bytes (default: 100 MB)
    public let maxRequestBodyBytes: UInt
    /// Maximum size of multipart fields in bytes (default: 100 MB)
    public let maxMultipartFieldBytes: UInt
    public init(host: String, port: UInt16, corsOrigins: [String], maxRequestBodyBytes: UInt, maxMultipartFieldBytes: UInt) {
        self.host = host
        self.port = port
        self.corsOrigins = corsOrigins
        self.maxRequestBodyBytes = maxRequestBodyBytes
        self.maxMultipartFieldBytes = maxMultipartFieldBytes
    }
    private enum CodingKeys: String, CodingKey {
        case host = "host"
        case port = "port"
        case corsOrigins = "cors_origins"
        case maxRequestBodyBytes = "max_request_body_bytes"
        case maxMultipartFieldBytes = "max_multipart_field_bytes"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.host = try container.decodeIfPresent(String.self, forKey: .host) ?? ""
        self.port = try container.decodeIfPresent(UInt16.self, forKey: .port) ?? 0
        self.corsOrigins = try container.decodeIfPresent([String].self, forKey: .corsOrigins) ?? []
        self.maxRequestBodyBytes = try container.decodeIfPresent(UInt.self, forKey: .maxRequestBodyBytes) ?? 0
        self.maxMultipartFieldBytes = try container.decodeIfPresent(UInt.self, forKey: .maxMultipartFieldBytes) ?? 0
    }
}

// MARK: - Internal FFI conversions for ServerConfig
internal extension ServerConfig {
    init(_ rb: RustBridge.ServerConfigRef) throws {
        self.host = rb.host().toString()
        self.port = rb.port()
        self.corsOrigins = rb.corsOrigins().map { $0.as_str().toString() }
        self.maxRequestBodyBytes = rb.maxRequestBodyBytes()
        self.maxMultipartFieldBytes = rb.maxMultipartFieldBytes()
    }
    func intoRust() throws -> RustBridge.ServerConfig {
        let __corsOrigins = RustVec<RustString>()
        for __elem in self.corsOrigins { __corsOrigins.push(value: RustString(__elem)) }
        return RustBridge.ServerConfig(RustString(self.host), self.port, __corsOrigins, self.maxRequestBodyBytes, self.maxMultipartFieldBytes)
    }
}

public typealias StructuredDataResult = RustBridge.StructuredDataResult

/// Application properties from docProps/app.xml for DOCX
///
/// Contains Word-specific document statistics and metadata.
public struct DocxAppProperties: Codable, Sendable, Hashable {
    /// Application name (e.g., "Microsoft Office Word")
    public let application: String?
    /// Application version
    public let appVersion: String?
    /// Template filename
    public let template: String?
    /// Total editing time in minutes
    public let totalTime: Int32?
    /// Number of pages
    public let pages: Int32?
    /// Number of words
    public let words: Int32?
    /// Number of characters (excluding spaces)
    public let characters: Int32?
    /// Number of characters (including spaces)
    public let charactersWithSpaces: Int32?
    /// Number of lines
    public let lines: Int32?
    /// Number of paragraphs
    public let paragraphs: Int32?
    /// Company name
    public let company: String?
    /// Document security level
    public let docSecurity: Int32?
    /// Scale crop flag
    public let scaleCrop: Bool?
    /// Links up to date flag
    public let linksUpToDate: Bool?
    /// Shared document flag
    public let sharedDoc: Bool?
    /// Hyperlinks changed flag
    public let hyperlinksChanged: Bool?
    public init(application: String? = nil, appVersion: String? = nil, template: String? = nil, totalTime: Int32? = nil, pages: Int32? = nil, words: Int32? = nil, characters: Int32? = nil, charactersWithSpaces: Int32? = nil, lines: Int32? = nil, paragraphs: Int32? = nil, company: String? = nil, docSecurity: Int32? = nil, scaleCrop: Bool? = nil, linksUpToDate: Bool? = nil, sharedDoc: Bool? = nil, hyperlinksChanged: Bool? = nil) {
        self.application = application
        self.appVersion = appVersion
        self.template = template
        self.totalTime = totalTime
        self.pages = pages
        self.words = words
        self.characters = characters
        self.charactersWithSpaces = charactersWithSpaces
        self.lines = lines
        self.paragraphs = paragraphs
        self.company = company
        self.docSecurity = docSecurity
        self.scaleCrop = scaleCrop
        self.linksUpToDate = linksUpToDate
        self.sharedDoc = sharedDoc
        self.hyperlinksChanged = hyperlinksChanged
    }
    private enum CodingKeys: String, CodingKey {
        case application = "application"
        case appVersion = "app_version"
        case template = "template"
        case totalTime = "total_time"
        case pages = "pages"
        case words = "words"
        case characters = "characters"
        case charactersWithSpaces = "characters_with_spaces"
        case lines = "lines"
        case paragraphs = "paragraphs"
        case company = "company"
        case docSecurity = "doc_security"
        case scaleCrop = "scale_crop"
        case linksUpToDate = "links_up_to_date"
        case sharedDoc = "shared_doc"
        case hyperlinksChanged = "hyperlinks_changed"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.application = try container.decodeIfPresent(String.self, forKey: .application) ?? nil
        self.appVersion = try container.decodeIfPresent(String.self, forKey: .appVersion) ?? nil
        self.template = try container.decodeIfPresent(String.self, forKey: .template) ?? nil
        self.totalTime = try container.decodeIfPresent(Int32.self, forKey: .totalTime) ?? nil
        self.pages = try container.decodeIfPresent(Int32.self, forKey: .pages) ?? nil
        self.words = try container.decodeIfPresent(Int32.self, forKey: .words) ?? nil
        self.characters = try container.decodeIfPresent(Int32.self, forKey: .characters) ?? nil
        self.charactersWithSpaces = try container.decodeIfPresent(Int32.self, forKey: .charactersWithSpaces) ?? nil
        self.lines = try container.decodeIfPresent(Int32.self, forKey: .lines) ?? nil
        self.paragraphs = try container.decodeIfPresent(Int32.self, forKey: .paragraphs) ?? nil
        self.company = try container.decodeIfPresent(String.self, forKey: .company) ?? nil
        self.docSecurity = try container.decodeIfPresent(Int32.self, forKey: .docSecurity) ?? nil
        self.scaleCrop = try container.decodeIfPresent(Bool.self, forKey: .scaleCrop) ?? nil
        self.linksUpToDate = try container.decodeIfPresent(Bool.self, forKey: .linksUpToDate) ?? nil
        self.sharedDoc = try container.decodeIfPresent(Bool.self, forKey: .sharedDoc) ?? nil
        self.hyperlinksChanged = try container.decodeIfPresent(Bool.self, forKey: .hyperlinksChanged) ?? nil
    }
}

// MARK: - Internal FFI conversions for DocxAppProperties
internal extension DocxAppProperties {
    init(_ rb: RustBridge.DocxAppPropertiesRef) throws {
        self.application = rb.application()?.toString()
        self.appVersion = rb.appVersion()?.toString()
        self.template = rb.template()?.toString()
        self.totalTime = rb.totalTime()
        self.pages = rb.pages()
        self.words = rb.words()
        self.characters = rb.characters()
        self.charactersWithSpaces = rb.charactersWithSpaces()
        self.lines = rb.lines()
        self.paragraphs = rb.paragraphs()
        self.company = rb.company()?.toString()
        self.docSecurity = rb.docSecurity()
        self.scaleCrop = rb.scaleCrop()
        self.linksUpToDate = rb.linksUpToDate()
        self.sharedDoc = rb.sharedDoc()
        self.hyperlinksChanged = rb.hyperlinksChanged()
    }
    func intoRust() throws -> RustBridge.DocxAppProperties {
        return RustBridge.DocxAppProperties(self.application.map(RustString.init), self.appVersion.map(RustString.init), self.template.map(RustString.init), self.totalTime, self.pages, self.words, self.characters, self.charactersWithSpaces, self.lines, self.paragraphs, self.company.map(RustString.init), self.docSecurity, self.scaleCrop, self.linksUpToDate, self.sharedDoc, self.hyperlinksChanged)
    }
}

/// Application properties from docProps/app.xml for XLSX
///
/// Contains Excel-specific document metadata.
public struct XlsxAppProperties: Codable, Sendable, Hashable {
    /// Application name (e.g., "Microsoft Excel")
    public let application: String?
    /// Application version
    public let appVersion: String?
    /// Document security level
    public let docSecurity: Int32?
    /// Scale crop flag
    public let scaleCrop: Bool?
    /// Links up to date flag
    public let linksUpToDate: Bool?
    /// Shared document flag
    public let sharedDoc: Bool?
    /// Hyperlinks changed flag
    public let hyperlinksChanged: Bool?
    /// Company name
    public let company: String?
    /// Worksheet names
    public let worksheetNames: [String]
    public init(application: String? = nil, appVersion: String? = nil, docSecurity: Int32? = nil, scaleCrop: Bool? = nil, linksUpToDate: Bool? = nil, sharedDoc: Bool? = nil, hyperlinksChanged: Bool? = nil, company: String? = nil, worksheetNames: [String]) {
        self.application = application
        self.appVersion = appVersion
        self.docSecurity = docSecurity
        self.scaleCrop = scaleCrop
        self.linksUpToDate = linksUpToDate
        self.sharedDoc = sharedDoc
        self.hyperlinksChanged = hyperlinksChanged
        self.company = company
        self.worksheetNames = worksheetNames
    }
    private enum CodingKeys: String, CodingKey {
        case application = "application"
        case appVersion = "app_version"
        case docSecurity = "doc_security"
        case scaleCrop = "scale_crop"
        case linksUpToDate = "links_up_to_date"
        case sharedDoc = "shared_doc"
        case hyperlinksChanged = "hyperlinks_changed"
        case company = "company"
        case worksheetNames = "worksheet_names"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.application = try container.decodeIfPresent(String.self, forKey: .application) ?? nil
        self.appVersion = try container.decodeIfPresent(String.self, forKey: .appVersion) ?? nil
        self.docSecurity = try container.decodeIfPresent(Int32.self, forKey: .docSecurity) ?? nil
        self.scaleCrop = try container.decodeIfPresent(Bool.self, forKey: .scaleCrop) ?? nil
        self.linksUpToDate = try container.decodeIfPresent(Bool.self, forKey: .linksUpToDate) ?? nil
        self.sharedDoc = try container.decodeIfPresent(Bool.self, forKey: .sharedDoc) ?? nil
        self.hyperlinksChanged = try container.decodeIfPresent(Bool.self, forKey: .hyperlinksChanged) ?? nil
        self.company = try container.decodeIfPresent(String.self, forKey: .company) ?? nil
        self.worksheetNames = try container.decodeIfPresent([String].self, forKey: .worksheetNames) ?? []
    }
}

// MARK: - Internal FFI conversions for XlsxAppProperties
internal extension XlsxAppProperties {
    init(_ rb: RustBridge.XlsxAppPropertiesRef) throws {
        self.application = rb.application()?.toString()
        self.appVersion = rb.appVersion()?.toString()
        self.docSecurity = rb.docSecurity()
        self.scaleCrop = rb.scaleCrop()
        self.linksUpToDate = rb.linksUpToDate()
        self.sharedDoc = rb.sharedDoc()
        self.hyperlinksChanged = rb.hyperlinksChanged()
        self.company = rb.company()?.toString()
        self.worksheetNames = rb.worksheetNames().map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.XlsxAppProperties {
        let __worksheetNames = RustVec<RustString>()
        for __elem in self.worksheetNames { __worksheetNames.push(value: RustString(__elem)) }
        return RustBridge.XlsxAppProperties(self.application.map(RustString.init), self.appVersion.map(RustString.init), self.docSecurity, self.scaleCrop, self.linksUpToDate, self.sharedDoc, self.hyperlinksChanged, self.company.map(RustString.init), __worksheetNames)
    }
}

/// Application properties from docProps/app.xml for PPTX
///
/// Contains PowerPoint-specific document metadata.
public struct PptxAppProperties: Codable, Sendable, Hashable {
    /// Application name (e.g., "Microsoft Office PowerPoint")
    public let application: String?
    /// Application version
    public let appVersion: String?
    /// Total editing time in minutes
    public let totalTime: Int32?
    /// Company name
    public let company: String?
    /// Document security level
    public let docSecurity: Int32?
    /// Scale crop flag
    public let scaleCrop: Bool?
    /// Links up to date flag
    public let linksUpToDate: Bool?
    /// Shared document flag
    public let sharedDoc: Bool?
    /// Hyperlinks changed flag
    public let hyperlinksChanged: Bool?
    /// Number of slides
    public let slides: Int32?
    /// Number of notes
    public let notes: Int32?
    /// Number of hidden slides
    public let hiddenSlides: Int32?
    /// Number of multimedia clips
    public let multimediaClips: Int32?
    /// Presentation format (e.g., "Widescreen", "Standard")
    public let presentationFormat: String?
    /// Slide titles
    public let slideTitles: [String]
    public init(application: String? = nil, appVersion: String? = nil, totalTime: Int32? = nil, company: String? = nil, docSecurity: Int32? = nil, scaleCrop: Bool? = nil, linksUpToDate: Bool? = nil, sharedDoc: Bool? = nil, hyperlinksChanged: Bool? = nil, slides: Int32? = nil, notes: Int32? = nil, hiddenSlides: Int32? = nil, multimediaClips: Int32? = nil, presentationFormat: String? = nil, slideTitles: [String]) {
        self.application = application
        self.appVersion = appVersion
        self.totalTime = totalTime
        self.company = company
        self.docSecurity = docSecurity
        self.scaleCrop = scaleCrop
        self.linksUpToDate = linksUpToDate
        self.sharedDoc = sharedDoc
        self.hyperlinksChanged = hyperlinksChanged
        self.slides = slides
        self.notes = notes
        self.hiddenSlides = hiddenSlides
        self.multimediaClips = multimediaClips
        self.presentationFormat = presentationFormat
        self.slideTitles = slideTitles
    }
    private enum CodingKeys: String, CodingKey {
        case application = "application"
        case appVersion = "app_version"
        case totalTime = "total_time"
        case company = "company"
        case docSecurity = "doc_security"
        case scaleCrop = "scale_crop"
        case linksUpToDate = "links_up_to_date"
        case sharedDoc = "shared_doc"
        case hyperlinksChanged = "hyperlinks_changed"
        case slides = "slides"
        case notes = "notes"
        case hiddenSlides = "hidden_slides"
        case multimediaClips = "multimedia_clips"
        case presentationFormat = "presentation_format"
        case slideTitles = "slide_titles"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.application = try container.decodeIfPresent(String.self, forKey: .application) ?? nil
        self.appVersion = try container.decodeIfPresent(String.self, forKey: .appVersion) ?? nil
        self.totalTime = try container.decodeIfPresent(Int32.self, forKey: .totalTime) ?? nil
        self.company = try container.decodeIfPresent(String.self, forKey: .company) ?? nil
        self.docSecurity = try container.decodeIfPresent(Int32.self, forKey: .docSecurity) ?? nil
        self.scaleCrop = try container.decodeIfPresent(Bool.self, forKey: .scaleCrop) ?? nil
        self.linksUpToDate = try container.decodeIfPresent(Bool.self, forKey: .linksUpToDate) ?? nil
        self.sharedDoc = try container.decodeIfPresent(Bool.self, forKey: .sharedDoc) ?? nil
        self.hyperlinksChanged = try container.decodeIfPresent(Bool.self, forKey: .hyperlinksChanged) ?? nil
        self.slides = try container.decodeIfPresent(Int32.self, forKey: .slides) ?? nil
        self.notes = try container.decodeIfPresent(Int32.self, forKey: .notes) ?? nil
        self.hiddenSlides = try container.decodeIfPresent(Int32.self, forKey: .hiddenSlides) ?? nil
        self.multimediaClips = try container.decodeIfPresent(Int32.self, forKey: .multimediaClips) ?? nil
        self.presentationFormat = try container.decodeIfPresent(String.self, forKey: .presentationFormat) ?? nil
        self.slideTitles = try container.decodeIfPresent([String].self, forKey: .slideTitles) ?? []
    }
}

// MARK: - Internal FFI conversions for PptxAppProperties
internal extension PptxAppProperties {
    init(_ rb: RustBridge.PptxAppPropertiesRef) throws {
        self.application = rb.application()?.toString()
        self.appVersion = rb.appVersion()?.toString()
        self.totalTime = rb.totalTime()
        self.company = rb.company()?.toString()
        self.docSecurity = rb.docSecurity()
        self.scaleCrop = rb.scaleCrop()
        self.linksUpToDate = rb.linksUpToDate()
        self.sharedDoc = rb.sharedDoc()
        self.hyperlinksChanged = rb.hyperlinksChanged()
        self.slides = rb.slides()
        self.notes = rb.notes()
        self.hiddenSlides = rb.hiddenSlides()
        self.multimediaClips = rb.multimediaClips()
        self.presentationFormat = rb.presentationFormat()?.toString()
        self.slideTitles = rb.slideTitles().map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.PptxAppProperties {
        let __slideTitles = RustVec<RustString>()
        for __elem in self.slideTitles { __slideTitles.push(value: RustString(__elem)) }
        return RustBridge.PptxAppProperties(self.application.map(RustString.init), self.appVersion.map(RustString.init), self.totalTime, self.company.map(RustString.init), self.docSecurity, self.scaleCrop, self.linksUpToDate, self.sharedDoc, self.hyperlinksChanged, self.slides, self.notes, self.hiddenSlides, self.multimediaClips, self.presentationFormat.map(RustString.init), __slideTitles)
    }
}

/// Dublin Core metadata from docProps/core.xml
///
/// Contains standard metadata fields defined by the Dublin Core standard
/// and Office-specific extensions.
public struct CoreProperties: Codable, Sendable, Hashable {
    /// Document title
    public let title: String?
    /// Document subject/topic
    public let subject: String?
    /// Document creator/author
    public let creator: String?
    /// Keywords or tags
    public let keywords: String?
    /// Document description/abstract
    public let description: String?
    /// User who last modified the document
    public let lastModifiedBy: String?
    /// Revision number
    public let revision: String?
    /// Creation timestamp (ISO 8601)
    public let created: String?
    /// Last modification timestamp (ISO 8601)
    public let modified: String?
    /// Document category
    public let category: String?
    /// Content status (Draft, Final, etc.)
    public let contentStatus: String?
    /// Document language
    public let language: String?
    /// Unique identifier
    public let identifier: String?
    /// Document version
    public let version: String?
    /// Last print timestamp (ISO 8601)
    public let lastPrinted: String?
    public init(title: String? = nil, subject: String? = nil, creator: String? = nil, keywords: String? = nil, description: String? = nil, lastModifiedBy: String? = nil, revision: String? = nil, created: String? = nil, modified: String? = nil, category: String? = nil, contentStatus: String? = nil, language: String? = nil, identifier: String? = nil, version: String? = nil, lastPrinted: String? = nil) {
        self.title = title
        self.subject = subject
        self.creator = creator
        self.keywords = keywords
        self.description = description
        self.lastModifiedBy = lastModifiedBy
        self.revision = revision
        self.created = created
        self.modified = modified
        self.category = category
        self.contentStatus = contentStatus
        self.language = language
        self.identifier = identifier
        self.version = version
        self.lastPrinted = lastPrinted
    }
    private enum CodingKeys: String, CodingKey {
        case title = "title"
        case subject = "subject"
        case creator = "creator"
        case keywords = "keywords"
        case description = "description"
        case lastModifiedBy = "last_modified_by"
        case revision = "revision"
        case created = "created"
        case modified = "modified"
        case category = "category"
        case contentStatus = "content_status"
        case language = "language"
        case identifier = "identifier"
        case version = "version"
        case lastPrinted = "last_printed"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.title = try container.decodeIfPresent(String.self, forKey: .title) ?? nil
        self.subject = try container.decodeIfPresent(String.self, forKey: .subject) ?? nil
        self.creator = try container.decodeIfPresent(String.self, forKey: .creator) ?? nil
        self.keywords = try container.decodeIfPresent(String.self, forKey: .keywords) ?? nil
        self.description = try container.decodeIfPresent(String.self, forKey: .description) ?? nil
        self.lastModifiedBy = try container.decodeIfPresent(String.self, forKey: .lastModifiedBy) ?? nil
        self.revision = try container.decodeIfPresent(String.self, forKey: .revision) ?? nil
        self.created = try container.decodeIfPresent(String.self, forKey: .created) ?? nil
        self.modified = try container.decodeIfPresent(String.self, forKey: .modified) ?? nil
        self.category = try container.decodeIfPresent(String.self, forKey: .category) ?? nil
        self.contentStatus = try container.decodeIfPresent(String.self, forKey: .contentStatus) ?? nil
        self.language = try container.decodeIfPresent(String.self, forKey: .language) ?? nil
        self.identifier = try container.decodeIfPresent(String.self, forKey: .identifier) ?? nil
        self.version = try container.decodeIfPresent(String.self, forKey: .version) ?? nil
        self.lastPrinted = try container.decodeIfPresent(String.self, forKey: .lastPrinted) ?? nil
    }
}

// MARK: - Internal FFI conversions for CoreProperties
internal extension CoreProperties {
    init(_ rb: RustBridge.CorePropertiesRef) throws {
        self.title = rb.title()?.toString()
        self.subject = rb.subject()?.toString()
        self.creator = rb.creator()?.toString()
        self.keywords = rb.keywords()?.toString()
        self.description = rb.description()?.toString()
        self.lastModifiedBy = rb.lastModifiedBy()?.toString()
        self.revision = rb.revision()?.toString()
        self.created = rb.created()?.toString()
        self.modified = rb.modified()?.toString()
        self.category = rb.category()?.toString()
        self.contentStatus = rb.contentStatus()?.toString()
        self.language = rb.language()?.toString()
        self.identifier = rb.identifier()?.toString()
        self.version = rb.version()?.toString()
        self.lastPrinted = rb.lastPrinted()?.toString()
    }
    func intoRust() throws -> RustBridge.CoreProperties {
        return RustBridge.CoreProperties(self.title.map(RustString.init), self.subject.map(RustString.init), self.creator.map(RustString.init), self.keywords.map(RustString.init), self.description.map(RustString.init), self.lastModifiedBy.map(RustString.init), self.revision.map(RustString.init), self.created.map(RustString.init), self.modified.map(RustString.init), self.category.map(RustString.init), self.contentStatus.map(RustString.init), self.language.map(RustString.init), self.identifier.map(RustString.init), self.version.map(RustString.init), self.lastPrinted.map(RustString.init))
    }
}

/// Configuration for security limits across extractors.
///
/// All limits are intentionally conservative to prevent DoS attacks
/// while still supporting legitimate documents.
public struct SecurityLimits: Codable, Sendable, Hashable {
    /// Maximum uncompressed size for archives (500 MB)
    public let maxArchiveSize: UInt
    /// Maximum compression ratio before flagging as potential bomb (100:1)
    public let maxCompressionRatio: UInt
    /// Maximum number of files in archive (10,000)
    public let maxFilesInArchive: UInt
    /// Maximum nesting depth for structures (100)
    public let maxNestingDepth: UInt
    /// Maximum length of any single XML entity / attribute / token (1 MiB).
    /// This is a per-token cap, NOT a total cap — billion-laughs class
    /// attacks where a single entity expands to hundreds of MB are caught
    /// here, while normal long text content (a paragraph, a CDATA block) is
    /// caught by `max_content_size` instead.
    public let maxEntityLength: UInt
    /// Maximum string growth per document (100 MB)
    public let maxContentSize: UInt
    /// Maximum iterations per operation
    public let maxIterations: UInt
    /// Maximum XML depth (100 levels)
    public let maxXmlDepth: UInt
    /// Maximum cells per table (100,000)
    public let maxTableCells: UInt
    public init(maxArchiveSize: UInt, maxCompressionRatio: UInt, maxFilesInArchive: UInt, maxNestingDepth: UInt, maxEntityLength: UInt, maxContentSize: UInt, maxIterations: UInt, maxXmlDepth: UInt, maxTableCells: UInt) {
        self.maxArchiveSize = maxArchiveSize
        self.maxCompressionRatio = maxCompressionRatio
        self.maxFilesInArchive = maxFilesInArchive
        self.maxNestingDepth = maxNestingDepth
        self.maxEntityLength = maxEntityLength
        self.maxContentSize = maxContentSize
        self.maxIterations = maxIterations
        self.maxXmlDepth = maxXmlDepth
        self.maxTableCells = maxTableCells
    }
    private enum CodingKeys: String, CodingKey {
        case maxArchiveSize = "max_archive_size"
        case maxCompressionRatio = "max_compression_ratio"
        case maxFilesInArchive = "max_files_in_archive"
        case maxNestingDepth = "max_nesting_depth"
        case maxEntityLength = "max_entity_length"
        case maxContentSize = "max_content_size"
        case maxIterations = "max_iterations"
        case maxXmlDepth = "max_xml_depth"
        case maxTableCells = "max_table_cells"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.maxArchiveSize = try container.decodeIfPresent(UInt.self, forKey: .maxArchiveSize) ?? 524288000
        self.maxCompressionRatio = try container.decodeIfPresent(UInt.self, forKey: .maxCompressionRatio) ?? 100
        self.maxFilesInArchive = try container.decodeIfPresent(UInt.self, forKey: .maxFilesInArchive) ?? 10000
        self.maxNestingDepth = try container.decodeIfPresent(UInt.self, forKey: .maxNestingDepth) ?? 1024
        self.maxEntityLength = try container.decodeIfPresent(UInt.self, forKey: .maxEntityLength) ?? 1048576
        self.maxContentSize = try container.decodeIfPresent(UInt.self, forKey: .maxContentSize) ?? 104857600
        self.maxIterations = try container.decodeIfPresent(UInt.self, forKey: .maxIterations) ?? 10000000
        self.maxXmlDepth = try container.decodeIfPresent(UInt.self, forKey: .maxXmlDepth) ?? 1024
        self.maxTableCells = try container.decodeIfPresent(UInt.self, forKey: .maxTableCells) ?? 100000
    }
}

// MARK: - Internal FFI conversions for SecurityLimits
internal extension SecurityLimits {
    init(_ rb: RustBridge.SecurityLimitsRef) throws {
        self.maxArchiveSize = rb.maxArchiveSize()
        self.maxCompressionRatio = rb.maxCompressionRatio()
        self.maxFilesInArchive = rb.maxFilesInArchive()
        self.maxNestingDepth = rb.maxNestingDepth()
        self.maxEntityLength = rb.maxEntityLength()
        self.maxContentSize = rb.maxContentSize()
        self.maxIterations = rb.maxIterations()
        self.maxXmlDepth = rb.maxXmlDepth()
        self.maxTableCells = rb.maxTableCells()
    }
    func intoRust() throws -> RustBridge.SecurityLimits {
        return RustBridge.SecurityLimits(self.maxArchiveSize, self.maxCompressionRatio, self.maxFilesInArchive, self.maxNestingDepth, self.maxEntityLength, self.maxContentSize, self.maxIterations, self.maxXmlDepth, self.maxTableCells)
    }
}

public typealias TokenReductionConfig = RustBridge.TokenReductionConfig

/// A PDF annotation extracted from a document page.
public struct PdfAnnotation: Codable, Sendable, Hashable {
    /// The type of annotation.
    public let annotationType: PdfAnnotationType
    /// Text content of the annotation (e.g., comment text, link URL).
    public let content: String?
    /// Page number where the annotation appears (1-indexed).
    public let pageNumber: UInt32
    /// Bounding box of the annotation on the page.
    public let boundingBox: BoundingBox?
    public init(annotationType: PdfAnnotationType, content: String? = nil, pageNumber: UInt32, boundingBox: BoundingBox? = nil) {
        self.annotationType = annotationType
        self.content = content
        self.pageNumber = pageNumber
        self.boundingBox = boundingBox
    }
    private enum CodingKeys: String, CodingKey {
        case annotationType = "annotation_type"
        case content = "content"
        case pageNumber = "page_number"
        case boundingBox = "bounding_box"
    }
}

// MARK: - Internal FFI conversions for PdfAnnotation
internal extension PdfAnnotation {
    init(_ rb: RustBridge.PdfAnnotationRef) throws {
        self.annotationType = PdfAnnotationType(rawValue: rb.annotationType().toString()) ?? { fatalError("Unknown PdfAnnotationType: \(rb.annotationType().toString())") }()
        self.content = rb.content()?.toString()
        self.pageNumber = rb.pageNumber()
        self.boundingBox = try rb.boundingBox().map { try BoundingBox($0) }
    }
    func intoRust() throws -> RustBridge.PdfAnnotation {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.pdfAnnotationFromJson(json)
    }
}

/// Comprehensive Djot document structure with semantic preservation.
///
/// This type captures the full richness of Djot markup, including:
/// - Block-level structures (headings, lists, blockquotes, code blocks, etc.)
/// - Inline formatting (emphasis, strong, highlight, subscript, superscript, etc.)
/// - Attributes (classes, IDs, key-value pairs)
/// - Links, images, footnotes
/// - Math expressions (inline and display)
/// - Tables with full structure
///
/// Available when the `djot` feature is enabled.
public typealias DjotContent = RustBridge.DjotContent

/// Block-level element in a Djot document.
///
/// Represents structural elements like headings, paragraphs, lists, code blocks, etc.
public typealias FormattedBlock = RustBridge.FormattedBlock

/// Inline element within a block.
///
/// Represents text with formatting, links, images, etc.
public typealias InlineElement = RustBridge.InlineElement

/// Image element in Djot.
public struct DjotImage: Codable, Sendable, Hashable {
    /// Image source URL or path
    public let src: String
    /// Alternative text
    public let alt: String
    /// Optional title
    public let title: String?
    /// Element attributes
    public let attributes: String?
    public init(src: String, alt: String, title: String? = nil, attributes: String? = nil) {
        self.src = src
        self.alt = alt
        self.title = title
        self.attributes = attributes
    }
}

// MARK: - Internal FFI conversions for DjotImage
internal extension DjotImage {
    init(_ rb: RustBridge.DjotImageRef) throws {
        self.src = rb.src().toString()
        self.alt = rb.alt().toString()
        self.title = rb.title()?.toString()
        self.attributes = rb.attributes()?.toString()
    }
    func intoRust() throws -> RustBridge.DjotImage {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.djotImageFromJson(json)
    }
}

/// Link element in Djot.
public struct DjotLink: Codable, Sendable, Hashable {
    /// Link URL
    public let url: String
    /// Link text content
    public let text: String
    /// Optional title
    public let title: String?
    /// Element attributes
    public let attributes: String?
    public init(url: String, text: String, title: String? = nil, attributes: String? = nil) {
        self.url = url
        self.text = text
        self.title = title
        self.attributes = attributes
    }
}

// MARK: - Internal FFI conversions for DjotLink
internal extension DjotLink {
    init(_ rb: RustBridge.DjotLinkRef) throws {
        self.url = rb.url().toString()
        self.text = rb.text().toString()
        self.title = rb.title()?.toString()
        self.attributes = rb.attributes()?.toString()
    }
    func intoRust() throws -> RustBridge.DjotLink {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.djotLinkFromJson(json)
    }
}

/// Footnote in Djot.
public typealias Footnote = RustBridge.Footnote

/// Top-level structured document representation.
///
/// A flat array of nodes with index-based parent/child references forming a tree.
/// Root-level nodes have `parent: None`. Use `body_roots()` and `furniture_roots()`
/// to iterate over top-level content by layer.
///
/// # Validation
///
/// Call `validate()` after construction to verify all node indices are in bounds
/// and parent-child relationships are bidirectionally consistent.
public typealias DocumentStructure = RustBridge.DocumentStructure

/// A resolved relationship between two nodes in the document tree.
public struct DocumentRelationship: Codable, Sendable, Hashable {
    /// Source node index (the referencing node).
    public let source: UInt32
    /// Target node index (the referenced node).
    public let target: UInt32
    /// Semantic kind of the relationship.
    public let kind: RelationshipKind
    public init(source: UInt32, target: UInt32, kind: RelationshipKind) {
        self.source = source
        self.target = target
        self.kind = kind
    }
}

// MARK: - Internal FFI conversions for DocumentRelationship
internal extension DocumentRelationship {
    init(_ rb: RustBridge.DocumentRelationshipRef) throws {
        self.source = rb.source()
        self.target = rb.target()
        self.kind = RelationshipKind(rawValue: rb.kind().toString()) ?? { fatalError("Unknown RelationshipKind: \(rb.kind().toString())") }()
    }
    func intoRust() throws -> RustBridge.DocumentRelationship {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.documentRelationshipFromJson(json)
    }
}

/// A single node in the document tree.
///
/// Each node has deterministic `id`, typed `content`, optional `parent`/`children`
/// for tree structure, and metadata like page number, bounding box, and content layer.
public typealias DocumentNode = RustBridge.DocumentNode

/// Structured table grid with cell-level metadata.
///
/// Stores row/column dimensions and a flat list of cells with position info.
public struct TableGrid: Codable, Sendable, Hashable {
    /// Number of rows in the table.
    public let rows: UInt32
    /// Number of columns in the table.
    public let cols: UInt32
    /// All cells in row-major order.
    public let cells: [GridCell]
    public init(rows: UInt32, cols: UInt32, cells: [GridCell]) {
        self.rows = rows
        self.cols = cols
        self.cells = cells
    }
    private enum CodingKeys: String, CodingKey {
        case rows = "rows"
        case cols = "cols"
        case cells = "cells"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.rows = try container.decodeIfPresent(UInt32.self, forKey: .rows) ?? 0
        self.cols = try container.decodeIfPresent(UInt32.self, forKey: .cols) ?? 0
        self.cells = try container.decodeIfPresent([GridCell].self, forKey: .cells) ?? []
    }
}

// MARK: - Internal FFI conversions for TableGrid
internal extension TableGrid {
    init(_ rb: RustBridge.TableGridRef) throws {
        self.rows = rb.rows()
        self.cols = rb.cols()
        self.cells = try rb.cells().map { try GridCell($0) }
    }
    func intoRust() throws -> RustBridge.TableGrid {
        let __cells = RustVec<RustBridge.GridCell>()
        for __elem in self.cells { __cells.push(value: try __elem.intoRust()) }
        return RustBridge.TableGrid(self.rows, self.cols, __cells)
    }
}

/// Individual grid cell with position and span metadata.
public struct GridCell: Codable, Sendable, Hashable {
    /// Cell text content.
    public let content: String
    /// Zero-indexed row position.
    public let row: UInt32
    /// Zero-indexed column position.
    public let col: UInt32
    /// Number of rows this cell spans.
    public let rowSpan: UInt32
    /// Number of columns this cell spans.
    public let colSpan: UInt32
    /// Whether this is a header cell.
    public let isHeader: Bool
    /// Bounding box for this cell (if available).
    public let bbox: BoundingBox?
    public init(content: String, row: UInt32, col: UInt32, rowSpan: UInt32, colSpan: UInt32, isHeader: Bool, bbox: BoundingBox? = nil) {
        self.content = content
        self.row = row
        self.col = col
        self.rowSpan = rowSpan
        self.colSpan = colSpan
        self.isHeader = isHeader
        self.bbox = bbox
    }
    private enum CodingKeys: String, CodingKey {
        case content = "content"
        case row = "row"
        case col = "col"
        case rowSpan = "row_span"
        case colSpan = "col_span"
        case isHeader = "is_header"
        case bbox = "bbox"
    }
}

// MARK: - Internal FFI conversions for GridCell
internal extension GridCell {
    init(_ rb: RustBridge.GridCellRef) throws {
        self.content = rb.content().toString()
        self.row = rb.row()
        self.col = rb.col()
        self.rowSpan = rb.rowSpan()
        self.colSpan = rb.colSpan()
        self.isHeader = rb.isHeader()
        self.bbox = try rb.bbox().map { try BoundingBox($0) }
    }
    func intoRust() throws -> RustBridge.GridCell {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.gridCellFromJson(json)
    }
}

/// Inline text annotation — byte-range based formatting and links.
///
/// Annotations reference byte offsets into the node's text content,
/// enabling precise identification of formatted regions.
public struct TextAnnotation: Codable, Sendable, Hashable {
    /// Start byte offset in the node's text content (inclusive).
    public let start: UInt32
    /// End byte offset in the node's text content (exclusive).
    public let end: UInt32
    /// Annotation type.
    public let kind: AnnotationKind
    public init(start: UInt32, end: UInt32, kind: AnnotationKind) {
        self.start = start
        self.end = end
        self.kind = kind
    }
}

// MARK: - Internal FFI conversions for TextAnnotation
internal extension TextAnnotation {
    init(_ rb: RustBridge.TextAnnotationRef) throws {
        self.start = rb.start()
        self.end = rb.end()
        self.kind = try JSONDecoder().decode(AnnotationKind.self, from: ((rb.kind().toString()).data(using: .utf8) ?? Data("null".utf8)))
    }
    func intoRust() throws -> RustBridge.TextAnnotation {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.textAnnotationFromJson(json)
    }
}

/// General extraction result used by the core extraction API.
///
/// This is the main result type returned by all extraction functions.
public typealias ExtractionResult = RustBridge.ExtractionResult

/// A single file extracted from an archive.
///
/// When archives (ZIP, TAR, 7Z, GZIP) are extracted with recursive extraction
/// enabled, each processable file produces its own full `ExtractionResult`.
public typealias ArchiveEntry = RustBridge.ArchiveEntry

/// A non-fatal warning from a processing pipeline stage.
///
/// Captures errors from optional features that don't prevent extraction
/// but may indicate degraded results.
public struct ProcessingWarning: Codable, Sendable, Hashable {
    /// The pipeline stage or feature that produced this warning
    /// (e.g., "embedding", "chunking", "language_detection", "output_format").
    public let source: String
    /// Human-readable description of what went wrong.
    public let message: String
    public init(source: String, message: String) {
        self.source = source
        self.message = message
    }
}

// MARK: - Internal FFI conversions for ProcessingWarning
internal extension ProcessingWarning {
    init(_ rb: RustBridge.ProcessingWarningRef) throws {
        self.source = rb.source().toString()
        self.message = rb.message().toString()
    }
    func intoRust() throws -> RustBridge.ProcessingWarning {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.processingWarningFromJson(json)
    }
}

/// Token usage and cost data for a single LLM call made during extraction.
///
/// Populated when VLM OCR, structured extraction, or LLM-based embeddings
/// are used. Multiple entries may be present when multiple LLM calls occur
/// within one extraction (e.g. VLM OCR + structured extraction).
public struct LlmUsage: Codable, Sendable, Hashable {
    /// The LLM model identifier (e.g. "openai/gpt-4o", "anthropic/claude-sonnet-4-20250514").
    public let model: String
    /// The pipeline stage that triggered this LLM call
    /// (e.g. "vlm_ocr", "structured_extraction", "embeddings").
    public let source: String
    /// Number of input/prompt tokens consumed.
    public let inputTokens: UInt64?
    /// Number of output/completion tokens generated.
    public let outputTokens: UInt64?
    /// Total tokens (input + output).
    public let totalTokens: UInt64?
    /// Estimated cost in USD based on the provider's published pricing.
    public let estimatedCost: Double?
    /// Why the model stopped generating (e.g. "stop", "length", "content_filter").
    public let finishReason: String?
    public init(model: String, source: String, inputTokens: UInt64? = nil, outputTokens: UInt64? = nil, totalTokens: UInt64? = nil, estimatedCost: Double? = nil, finishReason: String? = nil) {
        self.model = model
        self.source = source
        self.inputTokens = inputTokens
        self.outputTokens = outputTokens
        self.totalTokens = totalTokens
        self.estimatedCost = estimatedCost
        self.finishReason = finishReason
    }
    private enum CodingKeys: String, CodingKey {
        case model = "model"
        case source = "source"
        case inputTokens = "input_tokens"
        case outputTokens = "output_tokens"
        case totalTokens = "total_tokens"
        case estimatedCost = "estimated_cost"
        case finishReason = "finish_reason"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.model = try container.decodeIfPresent(String.self, forKey: .model) ?? ""
        self.source = try container.decodeIfPresent(String.self, forKey: .source) ?? ""
        self.inputTokens = try container.decodeIfPresent(UInt64.self, forKey: .inputTokens) ?? nil
        self.outputTokens = try container.decodeIfPresent(UInt64.self, forKey: .outputTokens) ?? nil
        self.totalTokens = try container.decodeIfPresent(UInt64.self, forKey: .totalTokens) ?? nil
        self.estimatedCost = try container.decodeIfPresent(Double.self, forKey: .estimatedCost) ?? nil
        self.finishReason = try container.decodeIfPresent(String.self, forKey: .finishReason) ?? nil
    }
}

// MARK: - Internal FFI conversions for LlmUsage
internal extension LlmUsage {
    init(_ rb: RustBridge.LlmUsageRef) throws {
        self.model = rb.model().toString()
        self.source = rb.source().toString()
        self.inputTokens = rb.inputTokens()
        self.outputTokens = rb.outputTokens()
        self.totalTokens = rb.totalTokens()
        self.estimatedCost = rb.estimatedCost()
        self.finishReason = rb.finishReason()?.toString()
    }
    func intoRust() throws -> RustBridge.LlmUsage {
        return RustBridge.LlmUsage(RustString(self.model), RustString(self.source), self.inputTokens, self.outputTokens, self.totalTokens, self.estimatedCost, self.finishReason.map(RustString.init))
    }
}

/// A text chunk with optional embedding and metadata.
///
/// Chunks are created when chunking is enabled in `ExtractionConfig`. Each chunk
/// contains the text content, optional embedding vector (if embedding generation
/// is configured), and metadata about its position in the document.
public struct Chunk: Codable, Sendable, Hashable {
    /// The text content of this chunk.
    public let content: String
    /// Semantic structural classification of this chunk.
    ///
    /// Assigned by the heuristic classifier based on content patterns and
    /// heading context. Defaults to `ChunkType::Unknown` when no rule matches.
    public let chunkType: ChunkType
    /// Optional embedding vector for this chunk.
    ///
    /// Only populated when `EmbeddingConfig` is provided in chunking configuration.
    /// The dimensionality depends on the chosen embedding model.
    public let embedding: [Float]?
    /// Metadata about this chunk's position and properties.
    public let metadata: ChunkMetadata
    public init(content: String, chunkType: ChunkType, embedding: [Float]? = nil, metadata: ChunkMetadata) {
        self.content = content
        self.chunkType = chunkType
        self.embedding = embedding
        self.metadata = metadata
    }
    private enum CodingKeys: String, CodingKey {
        case content = "content"
        case chunkType = "chunk_type"
        case embedding = "embedding"
        case metadata = "metadata"
    }
}

// MARK: - Internal FFI conversions for Chunk
internal extension Chunk {
    init(_ rb: RustBridge.ChunkRef) throws {
        self.content = rb.content().toString()
        self.chunkType = ChunkType(rawValue: rb.chunkType().toString()) ?? { fatalError("Unknown ChunkType: \(rb.chunkType().toString())") }()
        self.embedding = rb.embedding().map { Array($0) }
        self.metadata = try ChunkMetadata(rb.metadata())
    }
    func intoRust() throws -> RustBridge.Chunk {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.chunkFromJson(json)
    }
}

/// Heading context for a chunk within a Markdown document.
///
/// Contains the heading hierarchy from document root to this chunk's section.
public struct HeadingContext: Codable, Sendable, Hashable {
    /// The heading hierarchy from document root to this chunk's section.
    /// Index 0 is the outermost (h1), last element is the most specific.
    public let headings: [HeadingLevel]
    public init(headings: [HeadingLevel]) {
        self.headings = headings
    }
}

// MARK: - Internal FFI conversions for HeadingContext
internal extension HeadingContext {
    init(_ rb: RustBridge.HeadingContextRef) throws {
        self.headings = try rb.headings().map { try HeadingLevel($0) }
    }
    func intoRust() throws -> RustBridge.HeadingContext {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.headingContextFromJson(json)
    }
}

/// A single heading in the hierarchy.
public struct HeadingLevel: Codable, Sendable, Hashable {
    /// Heading depth (1 = h1, 2 = h2, etc.)
    public let level: UInt8
    /// The text content of the heading.
    public let text: String
    public init(level: UInt8, text: String) {
        self.level = level
        self.text = text
    }
}

// MARK: - Internal FFI conversions for HeadingLevel
internal extension HeadingLevel {
    init(_ rb: RustBridge.HeadingLevelRef) throws {
        self.level = rb.level()
        self.text = rb.text().toString()
    }
    func intoRust() throws -> RustBridge.HeadingLevel {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.headingLevelFromJson(json)
    }
}

/// Metadata about a chunk's position in the original document.
public struct ChunkMetadata: Codable, Sendable, Hashable {
    /// Byte offset where this chunk starts in the original text (UTF-8 valid boundary).
    public let byteStart: UInt
    /// Byte offset where this chunk ends in the original text (UTF-8 valid boundary).
    public let byteEnd: UInt
    /// Number of tokens in this chunk (if available).
    ///
    /// This is calculated by the embedding model's tokenizer if embeddings are enabled.
    public let tokenCount: UInt?
    /// Zero-based index of this chunk in the document.
    public let chunkIndex: UInt
    /// Total number of chunks in the document.
    public let totalChunks: UInt
    /// First page number this chunk spans (1-indexed).
    ///
    /// Only populated when page tracking is enabled in extraction configuration.
    public let firstPage: UInt32?
    /// Last page number this chunk spans (1-indexed, equal to first_page for single-page chunks).
    ///
    /// Only populated when page tracking is enabled in extraction configuration.
    public let lastPage: UInt32?
    /// Heading context when using Markdown chunker.
    ///
    /// Contains the heading hierarchy this chunk falls under.
    /// Only populated when `ChunkerType::Markdown` is used.
    public let headingContext: HeadingContext?
    /// Indices into `ExtractionResult.images` for images on pages covered by this chunk.
    ///
    /// Contains zero-based indices into the top-level `images` collection for every
    /// image whose `page_number` falls within `[first_page, last_page]`.
    /// Empty when image extraction is disabled or the chunk spans no pages with images.
    public let imageIndices: [UInt32]
    public init(byteStart: UInt, byteEnd: UInt, tokenCount: UInt? = nil, chunkIndex: UInt, totalChunks: UInt, firstPage: UInt32? = nil, lastPage: UInt32? = nil, headingContext: HeadingContext? = nil, imageIndices: [UInt32]) {
        self.byteStart = byteStart
        self.byteEnd = byteEnd
        self.tokenCount = tokenCount
        self.chunkIndex = chunkIndex
        self.totalChunks = totalChunks
        self.firstPage = firstPage
        self.lastPage = lastPage
        self.headingContext = headingContext
        self.imageIndices = imageIndices
    }
    private enum CodingKeys: String, CodingKey {
        case byteStart = "byte_start"
        case byteEnd = "byte_end"
        case tokenCount = "token_count"
        case chunkIndex = "chunk_index"
        case totalChunks = "total_chunks"
        case firstPage = "first_page"
        case lastPage = "last_page"
        case headingContext = "heading_context"
        case imageIndices = "image_indices"
    }
}

// MARK: - Internal FFI conversions for ChunkMetadata
internal extension ChunkMetadata {
    init(_ rb: RustBridge.ChunkMetadataRef) throws {
        self.byteStart = rb.byteStart()
        self.byteEnd = rb.byteEnd()
        self.tokenCount = rb.tokenCount()
        self.chunkIndex = rb.chunkIndex()
        self.totalChunks = rb.totalChunks()
        self.firstPage = rb.firstPage()
        self.lastPage = rb.lastPage()
        self.headingContext = try rb.headingContext().map { try HeadingContext($0) }
        self.imageIndices = Array(rb.imageIndices())
    }
    func intoRust() throws -> RustBridge.ChunkMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.chunkMetadataFromJson(json)
    }
}

/// Extracted image from a document.
///
/// Contains raw image data, metadata, and optional nested OCR results.
/// Raw bytes allow cross-language compatibility - users can convert to
/// PIL.Image (Python), Sharp (Node.js), or other formats as needed.
public typealias ExtractedImage = RustBridge.ExtractedImage

/// Bounding box coordinates for element positioning.
public struct BoundingBox: Codable, Sendable, Hashable {
    /// Left x-coordinate
    public let x0: Double
    /// Bottom y-coordinate
    public let y0: Double
    /// Right x-coordinate
    public let x1: Double
    /// Top y-coordinate
    public let y1: Double
    public init(x0: Double, y0: Double, x1: Double, y1: Double) {
        self.x0 = x0
        self.y0 = y0
        self.x1 = x1
        self.y1 = y1
    }
    private enum CodingKeys: String, CodingKey {
        case x0 = "x0"
        case y0 = "y0"
        case x1 = "x1"
        case y1 = "y1"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.x0 = try container.decodeIfPresent(Double.self, forKey: .x0) ?? 0
        self.y0 = try container.decodeIfPresent(Double.self, forKey: .y0) ?? 0
        self.x1 = try container.decodeIfPresent(Double.self, forKey: .x1) ?? 0
        self.y1 = try container.decodeIfPresent(Double.self, forKey: .y1) ?? 0
    }
}

// MARK: - Internal FFI conversions for BoundingBox
internal extension BoundingBox {
    init(_ rb: RustBridge.BoundingBoxRef) throws {
        self.x0 = rb.x0()
        self.y0 = rb.y0()
        self.x1 = rb.x1()
        self.y1 = rb.y1()
    }
    func intoRust() throws -> RustBridge.BoundingBox {
        return RustBridge.BoundingBox(self.x0, self.y0, self.x1, self.y1)
    }
}

/// Metadata for a semantic element.
public typealias ElementMetadata = RustBridge.ElementMetadata

/// Semantic element extracted from document.
///
/// Represents a logical unit of content with semantic classification,
/// unique identifier, and metadata for tracking origin and position.
public typealias Element = RustBridge.Element

/// Excel workbook representation.
///
/// Contains all sheets from an Excel file (.xlsx, .xls, etc.) with
/// extracted content and metadata.
public typealias ExcelWorkbook = RustBridge.ExcelWorkbook

/// Single Excel worksheet.
///
/// Represents one sheet from an Excel workbook with its content
/// converted to Markdown format and dimensional statistics.
public struct ExcelSheet: Codable, Sendable, Hashable {
    /// Sheet name as it appears in Excel
    public let name: String
    /// Sheet content converted to Markdown tables
    public let markdown: String
    /// Number of rows
    public let rowCount: UInt
    /// Number of columns
    public let colCount: UInt
    /// Total number of non-empty cells
    public let cellCount: UInt
    /// Pre-extracted table cells (2D vector of cell values)
    /// Populated during markdown generation to avoid re-parsing markdown.
    /// None for empty sheets.
    public let tableCells: [[String]]?
    public init(name: String, markdown: String, rowCount: UInt, colCount: UInt, cellCount: UInt, tableCells: [[String]]? = nil) {
        self.name = name
        self.markdown = markdown
        self.rowCount = rowCount
        self.colCount = colCount
        self.cellCount = cellCount
        self.tableCells = tableCells
    }
    private enum CodingKeys: String, CodingKey {
        case name = "name"
        case markdown = "markdown"
        case rowCount = "row_count"
        case colCount = "col_count"
        case cellCount = "cell_count"
        case tableCells = "table_cells"
    }
}

// MARK: - Internal FFI conversions for ExcelSheet
internal extension ExcelSheet {
    init(_ rb: RustBridge.ExcelSheetRef) throws {
        self.name = rb.name().toString()
        self.markdown = rb.markdown().toString()
        self.rowCount = rb.rowCount()
        self.colCount = rb.colCount()
        self.cellCount = rb.cellCount()
        self.tableCells = try JSONDecoder().decode([[String]]?.self, from: ((rb.tableCells()?.toString() ?? "null").data(using: .utf8) ?? Data("null".utf8)))
    }
    func intoRust() throws -> RustBridge.ExcelSheet {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.excelSheetFromJson(json)
    }
}

/// XML extraction result.
///
/// Contains extracted text content from XML files along with
/// structural statistics about the XML document.
public struct XmlExtractionResult: Codable, Sendable, Hashable {
    /// Extracted text content (XML structure filtered out)
    public let content: String
    /// Total number of XML elements processed
    public let elementCount: UInt
    /// List of unique element names found (sorted)
    public let uniqueElements: [String]
    public init(content: String, elementCount: UInt, uniqueElements: [String]) {
        self.content = content
        self.elementCount = elementCount
        self.uniqueElements = uniqueElements
    }
    private enum CodingKeys: String, CodingKey {
        case content = "content"
        case elementCount = "element_count"
        case uniqueElements = "unique_elements"
    }
}

// MARK: - Internal FFI conversions for XmlExtractionResult
internal extension XmlExtractionResult {
    init(_ rb: RustBridge.XmlExtractionResultRef) throws {
        self.content = rb.content().toString()
        self.elementCount = rb.elementCount()
        self.uniqueElements = rb.uniqueElements().map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.XmlExtractionResult {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.xmlExtractionResultFromJson(json)
    }
}

/// Plain text and Markdown extraction result.
///
/// Contains the extracted text along with statistics and,
/// for Markdown files, structural elements like headers and links.
public struct TextExtractionResult: Codable, Sendable, Hashable {
    /// Extracted text content
    public let content: String
    /// Number of lines
    public let lineCount: UInt
    /// Number of words
    public let wordCount: UInt
    /// Number of characters
    public let characterCount: UInt
    /// Markdown headers (text only, Markdown files only)
    public let headers: [String]?
    /// Markdown links as (text, URL) tuples (Markdown files only)
    public let links: [[String]]?
    /// Code blocks as (language, code) tuples (Markdown files only)
    public let codeBlocks: [[String]]?
    public init(content: String, lineCount: UInt, wordCount: UInt, characterCount: UInt, headers: [String]? = nil, links: [[String]]? = nil, codeBlocks: [[String]]? = nil) {
        self.content = content
        self.lineCount = lineCount
        self.wordCount = wordCount
        self.characterCount = characterCount
        self.headers = headers
        self.links = links
        self.codeBlocks = codeBlocks
    }
    private enum CodingKeys: String, CodingKey {
        case content = "content"
        case lineCount = "line_count"
        case wordCount = "word_count"
        case characterCount = "character_count"
        case headers = "headers"
        case links = "links"
        case codeBlocks = "code_blocks"
    }
}

// MARK: - Internal FFI conversions for TextExtractionResult
internal extension TextExtractionResult {
    init(_ rb: RustBridge.TextExtractionResultRef) throws {
        self.content = rb.content().toString()
        self.lineCount = rb.lineCount()
        self.wordCount = rb.wordCount()
        self.characterCount = rb.characterCount()
        self.headers = rb.headers()?.map { $0.as_str().toString() }
        self.links = nil
        self.codeBlocks = nil
    }
    func intoRust() throws -> RustBridge.TextExtractionResult {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.textExtractionResultFromJson(json)
    }
}

/// PowerPoint (PPTX) extraction result.
///
/// Contains extracted slide content, metadata, and embedded images/tables.
public typealias PptxExtractionResult = RustBridge.PptxExtractionResult

/// Email extraction result.
///
/// Complete representation of an extracted email message (.eml or .msg)
/// including headers, body content, and attachments.
public typealias EmailExtractionResult = RustBridge.EmailExtractionResult

/// Email attachment representation.
///
/// Contains metadata and optionally the content of an email attachment.
public typealias EmailAttachment = RustBridge.EmailAttachment

/// OCR extraction result.
///
/// Result of performing OCR on an image or scanned document,
/// including recognized text and detected tables.
public typealias OcrExtractionResult = RustBridge.OcrExtractionResult

/// Table detected via OCR.
///
/// Represents a table structure recognized during OCR processing.
public struct OcrTable: Codable, Sendable, Hashable {
    /// Table cells as a 2D vector (rows × columns)
    public let cells: [[String]]
    /// Markdown representation of the table
    public let markdown: String
    /// Page number where the table was found (1-indexed)
    public let pageNumber: UInt32
    /// Bounding box of the table in pixel coordinates (from OCR word positions).
    public let boundingBox: OcrTableBoundingBox?
    public init(cells: [[String]], markdown: String, pageNumber: UInt32, boundingBox: OcrTableBoundingBox? = nil) {
        self.cells = cells
        self.markdown = markdown
        self.pageNumber = pageNumber
        self.boundingBox = boundingBox
    }
    private enum CodingKeys: String, CodingKey {
        case cells = "cells"
        case markdown = "markdown"
        case pageNumber = "page_number"
        case boundingBox = "bounding_box"
    }
}

// MARK: - Internal FFI conversions for OcrTable
internal extension OcrTable {
    init(_ rb: RustBridge.OcrTableRef) throws {
        self.cells = try JSONDecoder().decode([[String]].self, from: ((rb.cells().toString()).data(using: .utf8) ?? Data("null".utf8)))
        self.markdown = rb.markdown().toString()
        self.pageNumber = rb.pageNumber()
        self.boundingBox = try rb.boundingBox().map { try OcrTableBoundingBox($0) }
    }
    func intoRust() throws -> RustBridge.OcrTable {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.ocrTableFromJson(json)
    }
}

/// Bounding box for an OCR-detected table in pixel coordinates.
public struct OcrTableBoundingBox: Codable, Sendable, Hashable {
    /// Left x-coordinate (pixels)
    public let left: UInt32
    /// Top y-coordinate (pixels)
    public let top: UInt32
    /// Right x-coordinate (pixels)
    public let right: UInt32
    /// Bottom y-coordinate (pixels)
    public let bottom: UInt32
    public init(left: UInt32, top: UInt32, right: UInt32, bottom: UInt32) {
        self.left = left
        self.top = top
        self.right = right
        self.bottom = bottom
    }
}

// MARK: - Internal FFI conversions for OcrTableBoundingBox
internal extension OcrTableBoundingBox {
    init(_ rb: RustBridge.OcrTableBoundingBoxRef) throws {
        self.left = rb.left()
        self.top = rb.top()
        self.right = rb.right()
        self.bottom = rb.bottom()
    }
    func intoRust() throws -> RustBridge.OcrTableBoundingBox {
        return RustBridge.OcrTableBoundingBox(self.left, self.top, self.right, self.bottom)
    }
}

/// Image preprocessing configuration for OCR.
///
/// These settings control how images are preprocessed before OCR to improve
/// text recognition quality. Different preprocessing strategies work better
/// for different document types.
public struct ImagePreprocessingConfig: Codable, Sendable, Hashable {
    /// Target DPI for the image (300 is standard, 600 for small text).
    public let targetDpi: Int32
    /// Auto-detect and correct image rotation.
    public let autoRotate: Bool
    /// Correct skew (tilted images).
    public let deskew: Bool
    /// Remove noise from the image.
    public let denoise: Bool
    /// Enhance contrast for better text visibility.
    public let contrastEnhance: Bool
    /// Binarization method: "otsu", "sauvola", "adaptive".
    public let binarizationMethod: String
    /// Invert colors (white text on black → black on white).
    public let invertColors: Bool
    public init(targetDpi: Int32, autoRotate: Bool, deskew: Bool, denoise: Bool, contrastEnhance: Bool, binarizationMethod: String, invertColors: Bool) {
        self.targetDpi = targetDpi
        self.autoRotate = autoRotate
        self.deskew = deskew
        self.denoise = denoise
        self.contrastEnhance = contrastEnhance
        self.binarizationMethod = binarizationMethod
        self.invertColors = invertColors
    }
    private enum CodingKeys: String, CodingKey {
        case targetDpi = "target_dpi"
        case autoRotate = "auto_rotate"
        case deskew = "deskew"
        case denoise = "denoise"
        case contrastEnhance = "contrast_enhance"
        case binarizationMethod = "binarization_method"
        case invertColors = "invert_colors"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.targetDpi = try container.decodeIfPresent(Int32.self, forKey: .targetDpi) ?? 300
        self.autoRotate = try container.decodeIfPresent(Bool.self, forKey: .autoRotate) ?? true
        self.deskew = try container.decodeIfPresent(Bool.self, forKey: .deskew) ?? true
        self.denoise = try container.decodeIfPresent(Bool.self, forKey: .denoise) ?? false
        self.contrastEnhance = try container.decodeIfPresent(Bool.self, forKey: .contrastEnhance) ?? false
        self.binarizationMethod = try container.decodeIfPresent(String.self, forKey: .binarizationMethod) ?? "otsu"
        self.invertColors = try container.decodeIfPresent(Bool.self, forKey: .invertColors) ?? false
    }
}

// MARK: - Internal FFI conversions for ImagePreprocessingConfig
internal extension ImagePreprocessingConfig {
    init(_ rb: RustBridge.ImagePreprocessingConfigRef) throws {
        self.targetDpi = rb.targetDpi()
        self.autoRotate = rb.autoRotate()
        self.deskew = rb.deskew()
        self.denoise = rb.denoise()
        self.contrastEnhance = rb.contrastEnhance()
        self.binarizationMethod = rb.binarizationMethod().toString()
        self.invertColors = rb.invertColors()
    }
    func intoRust() throws -> RustBridge.ImagePreprocessingConfig {
        return RustBridge.ImagePreprocessingConfig(self.targetDpi, self.autoRotate, self.deskew, self.denoise, self.contrastEnhance, RustString(self.binarizationMethod), self.invertColors)
    }
}

/// Tesseract OCR configuration.
///
/// Provides fine-grained control over Tesseract OCR engine parameters.
/// Most users can use the defaults, but these settings allow optimization
/// for specific document types (invoices, handwriting, etc.).
public struct TesseractConfig: Codable, Sendable, Hashable {
    /// Language code (e.g., "eng", "deu", "fra")
    public let language: String
    /// Page Segmentation Mode (0-13).
    ///
    /// Common values:
    /// - 3: Fully automatic page segmentation (native default)
    /// - 6: Assume a single uniform block of text (WASM default — avoids layout-analysis hang)
    /// - 11: Sparse text with no particular order
    public let psm: Int32
    /// Output format ("text" or "markdown")
    public let outputFormat: String
    /// OCR Engine Mode (0-3).
    ///
    /// - 0: Legacy engine only
    /// - 1: Neural nets (LSTM) only (usually best)
    /// - 2: Legacy + LSTM
    /// - 3: Default (based on what's available)
    public let oem: Int32
    /// Minimum confidence threshold (0.0-100.0).
    ///
    /// Words with confidence below this threshold may be rejected or flagged.
    public let minConfidence: Double
    /// Image preprocessing configuration.
    ///
    /// Controls how images are preprocessed before OCR. Can significantly
    /// improve quality for scanned documents or low-quality images.
    public let preprocessing: ImagePreprocessingConfig?
    /// Enable automatic table detection and reconstruction
    public let enableTableDetection: Bool
    /// Minimum confidence threshold for table detection (0.0-1.0)
    public let tableMinConfidence: Double
    /// Column threshold for table detection (pixels)
    public let tableColumnThreshold: Int32
    /// Row threshold ratio for table detection (0.0-1.0)
    public let tableRowThresholdRatio: Double
    /// Enable OCR result caching
    public let useCache: Bool
    /// Use pre-adapted templates for character classification
    public let classifyUsePreAdaptedTemplates: Bool
    /// Enable N-gram language model
    public let languageModelNgramOn: Bool
    /// Don't reject good words during block-level processing
    public let tesseditDontBlkrejGoodWds: Bool
    /// Don't reject good words during row-level processing
    public let tesseditDontRowrejGoodWds: Bool
    /// Enable dictionary correction
    public let tesseditEnableDictCorrection: Bool
    /// Whitelist of allowed characters (empty = all allowed)
    public let tesseditCharWhitelist: String
    /// Blacklist of forbidden characters (empty = none forbidden)
    public let tesseditCharBlacklist: String
    /// Use primary language params model
    public let tesseditUsePrimaryParamsModel: Bool
    /// Variable-width space detection
    public let textordSpaceSizeIsVariable: Bool
    /// Use adaptive thresholding method
    public let thresholdingMethod: Bool
    public init(language: String, psm: Int32, outputFormat: String, oem: Int32, minConfidence: Double, preprocessing: ImagePreprocessingConfig? = nil, enableTableDetection: Bool, tableMinConfidence: Double, tableColumnThreshold: Int32, tableRowThresholdRatio: Double, useCache: Bool, classifyUsePreAdaptedTemplates: Bool, languageModelNgramOn: Bool, tesseditDontBlkrejGoodWds: Bool, tesseditDontRowrejGoodWds: Bool, tesseditEnableDictCorrection: Bool, tesseditCharWhitelist: String, tesseditCharBlacklist: String, tesseditUsePrimaryParamsModel: Bool, textordSpaceSizeIsVariable: Bool, thresholdingMethod: Bool) {
        self.language = language
        self.psm = psm
        self.outputFormat = outputFormat
        self.oem = oem
        self.minConfidence = minConfidence
        self.preprocessing = preprocessing
        self.enableTableDetection = enableTableDetection
        self.tableMinConfidence = tableMinConfidence
        self.tableColumnThreshold = tableColumnThreshold
        self.tableRowThresholdRatio = tableRowThresholdRatio
        self.useCache = useCache
        self.classifyUsePreAdaptedTemplates = classifyUsePreAdaptedTemplates
        self.languageModelNgramOn = languageModelNgramOn
        self.tesseditDontBlkrejGoodWds = tesseditDontBlkrejGoodWds
        self.tesseditDontRowrejGoodWds = tesseditDontRowrejGoodWds
        self.tesseditEnableDictCorrection = tesseditEnableDictCorrection
        self.tesseditCharWhitelist = tesseditCharWhitelist
        self.tesseditCharBlacklist = tesseditCharBlacklist
        self.tesseditUsePrimaryParamsModel = tesseditUsePrimaryParamsModel
        self.textordSpaceSizeIsVariable = textordSpaceSizeIsVariable
        self.thresholdingMethod = thresholdingMethod
    }
    private enum CodingKeys: String, CodingKey {
        case language = "language"
        case psm = "psm"
        case outputFormat = "output_format"
        case oem = "oem"
        case minConfidence = "min_confidence"
        case preprocessing = "preprocessing"
        case enableTableDetection = "enable_table_detection"
        case tableMinConfidence = "table_min_confidence"
        case tableColumnThreshold = "table_column_threshold"
        case tableRowThresholdRatio = "table_row_threshold_ratio"
        case useCache = "use_cache"
        case classifyUsePreAdaptedTemplates = "classify_use_pre_adapted_templates"
        case languageModelNgramOn = "language_model_ngram_on"
        case tesseditDontBlkrejGoodWds = "tessedit_dont_blkrej_good_wds"
        case tesseditDontRowrejGoodWds = "tessedit_dont_rowrej_good_wds"
        case tesseditEnableDictCorrection = "tessedit_enable_dict_correction"
        case tesseditCharWhitelist = "tessedit_char_whitelist"
        case tesseditCharBlacklist = "tessedit_char_blacklist"
        case tesseditUsePrimaryParamsModel = "tessedit_use_primary_params_model"
        case textordSpaceSizeIsVariable = "textord_space_size_is_variable"
        case thresholdingMethod = "thresholding_method"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.language = try container.decodeIfPresent(String.self, forKey: .language) ?? "eng"
        self.psm = try container.decodeIfPresent(Int32.self, forKey: .psm) ?? 3
        self.outputFormat = try container.decodeIfPresent(String.self, forKey: .outputFormat) ?? "markdown"
        self.oem = try container.decodeIfPresent(Int32.self, forKey: .oem) ?? 3
        self.minConfidence = try container.decodeIfPresent(Double.self, forKey: .minConfidence) ?? 0.0
        self.preprocessing = try container.decodeIfPresent(ImagePreprocessingConfig.self, forKey: .preprocessing) ?? nil
        self.enableTableDetection = try container.decodeIfPresent(Bool.self, forKey: .enableTableDetection) ?? true
        self.tableMinConfidence = try container.decodeIfPresent(Double.self, forKey: .tableMinConfidence) ?? 0.0
        self.tableColumnThreshold = try container.decodeIfPresent(Int32.self, forKey: .tableColumnThreshold) ?? 50
        self.tableRowThresholdRatio = try container.decodeIfPresent(Double.self, forKey: .tableRowThresholdRatio) ?? 0.5
        self.useCache = try container.decodeIfPresent(Bool.self, forKey: .useCache) ?? true
        self.classifyUsePreAdaptedTemplates = try container.decodeIfPresent(Bool.self, forKey: .classifyUsePreAdaptedTemplates) ?? true
        self.languageModelNgramOn = try container.decodeIfPresent(Bool.self, forKey: .languageModelNgramOn) ?? false
        self.tesseditDontBlkrejGoodWds = try container.decodeIfPresent(Bool.self, forKey: .tesseditDontBlkrejGoodWds) ?? true
        self.tesseditDontRowrejGoodWds = try container.decodeIfPresent(Bool.self, forKey: .tesseditDontRowrejGoodWds) ?? true
        self.tesseditEnableDictCorrection = try container.decodeIfPresent(Bool.self, forKey: .tesseditEnableDictCorrection) ?? true
        self.tesseditCharWhitelist = try container.decodeIfPresent(String.self, forKey: .tesseditCharWhitelist) ?? ""
        self.tesseditCharBlacklist = try container.decodeIfPresent(String.self, forKey: .tesseditCharBlacklist) ?? ""
        self.tesseditUsePrimaryParamsModel = try container.decodeIfPresent(Bool.self, forKey: .tesseditUsePrimaryParamsModel) ?? true
        self.textordSpaceSizeIsVariable = try container.decodeIfPresent(Bool.self, forKey: .textordSpaceSizeIsVariable) ?? true
        self.thresholdingMethod = try container.decodeIfPresent(Bool.self, forKey: .thresholdingMethod) ?? false
    }
}

// MARK: - Internal FFI conversions for TesseractConfig
internal extension TesseractConfig {
    init(_ rb: RustBridge.TesseractConfigRef) throws {
        self.language = rb.language().toString()
        self.psm = rb.psm()
        self.outputFormat = rb.outputFormat().toString()
        self.oem = rb.oem()
        self.minConfidence = rb.minConfidence()
        self.preprocessing = try rb.preprocessing().map { try ImagePreprocessingConfig($0) }
        self.enableTableDetection = rb.enableTableDetection()
        self.tableMinConfidence = rb.tableMinConfidence()
        self.tableColumnThreshold = rb.tableColumnThreshold()
        self.tableRowThresholdRatio = rb.tableRowThresholdRatio()
        self.useCache = rb.useCache()
        self.classifyUsePreAdaptedTemplates = rb.classifyUsePreAdaptedTemplates()
        self.languageModelNgramOn = rb.languageModelNgramOn()
        self.tesseditDontBlkrejGoodWds = rb.tesseditDontBlkrejGoodWds()
        self.tesseditDontRowrejGoodWds = rb.tesseditDontRowrejGoodWds()
        self.tesseditEnableDictCorrection = rb.tesseditEnableDictCorrection()
        self.tesseditCharWhitelist = rb.tesseditCharWhitelist().toString()
        self.tesseditCharBlacklist = rb.tesseditCharBlacklist().toString()
        self.tesseditUsePrimaryParamsModel = rb.tesseditUsePrimaryParamsModel()
        self.textordSpaceSizeIsVariable = rb.textordSpaceSizeIsVariable()
        self.thresholdingMethod = rb.thresholdingMethod()
    }
    func intoRust() throws -> RustBridge.TesseractConfig {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.tesseractConfigFromJson(json)
    }
}

/// Image preprocessing metadata.
///
/// Tracks the transformations applied to an image during OCR preprocessing,
/// including DPI normalization, resizing, and resampling.
public struct ImagePreprocessingMetadata: Codable, Sendable, Hashable {
    /// Original image dimensions (width, height) in pixels
    public let originalDimensions: [UInt]
    /// Original image DPI (horizontal, vertical)
    public let originalDpi: [Double]
    /// Target DPI from configuration
    public let targetDpi: Int32
    /// Scaling factor applied to the image
    public let scaleFactor: Double
    /// Whether DPI was auto-adjusted based on content
    public let autoAdjusted: Bool
    /// Final DPI after processing
    public let finalDpi: Int32
    /// New dimensions after resizing (if resized)
    public let newDimensions: [UInt]?
    /// Resampling algorithm used ("LANCZOS3", "CATMULLROM", etc.)
    public let resampleMethod: String
    /// Whether dimensions were clamped to max_image_dimension
    public let dimensionClamped: Bool
    /// Calculated optimal DPI (if auto_adjust_dpi enabled)
    public let calculatedDpi: Int32?
    /// Whether resize was skipped (dimensions already optimal)
    public let skippedResize: Bool
    /// Error message if resize failed
    public let resizeError: String?
    public init(originalDimensions: [UInt], originalDpi: [Double], targetDpi: Int32, scaleFactor: Double, autoAdjusted: Bool, finalDpi: Int32, newDimensions: [UInt]? = nil, resampleMethod: String, dimensionClamped: Bool, calculatedDpi: Int32? = nil, skippedResize: Bool, resizeError: String? = nil) {
        self.originalDimensions = originalDimensions
        self.originalDpi = originalDpi
        self.targetDpi = targetDpi
        self.scaleFactor = scaleFactor
        self.autoAdjusted = autoAdjusted
        self.finalDpi = finalDpi
        self.newDimensions = newDimensions
        self.resampleMethod = resampleMethod
        self.dimensionClamped = dimensionClamped
        self.calculatedDpi = calculatedDpi
        self.skippedResize = skippedResize
        self.resizeError = resizeError
    }
    private enum CodingKeys: String, CodingKey {
        case originalDimensions = "original_dimensions"
        case originalDpi = "original_dpi"
        case targetDpi = "target_dpi"
        case scaleFactor = "scale_factor"
        case autoAdjusted = "auto_adjusted"
        case finalDpi = "final_dpi"
        case newDimensions = "new_dimensions"
        case resampleMethod = "resample_method"
        case dimensionClamped = "dimension_clamped"
        case calculatedDpi = "calculated_dpi"
        case skippedResize = "skipped_resize"
        case resizeError = "resize_error"
    }
}

// MARK: - Internal FFI conversions for ImagePreprocessingMetadata
internal extension ImagePreprocessingMetadata {
    init(_ rb: RustBridge.ImagePreprocessingMetadataRef) throws {
        self.originalDimensions = Array(rb.originalDimensions())
        self.originalDpi = Array(rb.originalDpi())
        self.targetDpi = rb.targetDpi()
        self.scaleFactor = rb.scaleFactor()
        self.autoAdjusted = rb.autoAdjusted()
        self.finalDpi = rb.finalDpi()
        self.newDimensions = rb.newDimensions().map { Array($0) }
        self.resampleMethod = rb.resampleMethod().toString()
        self.dimensionClamped = rb.dimensionClamped()
        self.calculatedDpi = rb.calculatedDpi()
        self.skippedResize = rb.skippedResize()
        self.resizeError = rb.resizeError()?.toString()
    }
    func intoRust() throws -> RustBridge.ImagePreprocessingMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.imagePreprocessingMetadataFromJson(json)
    }
}

/// Extraction result metadata.
///
/// Contains common fields applicable to all formats, format-specific metadata
/// via a discriminated union, and additional custom fields from postprocessors.
public typealias Metadata = RustBridge.Metadata

/// Excel/spreadsheet format metadata.
///
/// Identifies the document as a spreadsheet source via the `FormatMetadata::Excel`
/// discriminant. Sheet count and sheet names are stored inside this struct.
public struct ExcelMetadata: Codable, Sendable, Hashable {
    /// Number of sheets in the workbook.
    public let sheetCount: UInt32?
    /// Names of all sheets in the workbook.
    public let sheetNames: [String]?
    public init(sheetCount: UInt32? = nil, sheetNames: [String]? = nil) {
        self.sheetCount = sheetCount
        self.sheetNames = sheetNames
    }
    private enum CodingKeys: String, CodingKey {
        case sheetCount = "sheet_count"
        case sheetNames = "sheet_names"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.sheetCount = try container.decodeIfPresent(UInt32.self, forKey: .sheetCount) ?? nil
        self.sheetNames = try container.decodeIfPresent([String].self, forKey: .sheetNames) ?? nil
    }
}

// MARK: - Internal FFI conversions for ExcelMetadata
internal extension ExcelMetadata {
    init(_ rb: RustBridge.ExcelMetadataRef) throws {
        self.sheetCount = rb.sheetCount()
        self.sheetNames = rb.sheetNames()?.map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.ExcelMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.excelMetadataFromJson(json)
    }
}

/// Email metadata extracted from .eml and .msg files.
///
/// Includes sender/recipient information, message ID, and attachment list.
public struct EmailMetadata: Codable, Sendable, Hashable {
    /// Sender's email address
    public let fromEmail: String?
    /// Sender's display name
    public let fromName: String?
    /// Primary recipients
    public let toEmails: [String]
    /// CC recipients
    public let ccEmails: [String]
    /// BCC recipients
    public let bccEmails: [String]
    /// Message-ID header value
    public let messageId: String?
    /// List of attachment filenames
    public let attachments: [String]
    public init(fromEmail: String? = nil, fromName: String? = nil, toEmails: [String], ccEmails: [String], bccEmails: [String], messageId: String? = nil, attachments: [String]) {
        self.fromEmail = fromEmail
        self.fromName = fromName
        self.toEmails = toEmails
        self.ccEmails = ccEmails
        self.bccEmails = bccEmails
        self.messageId = messageId
        self.attachments = attachments
    }
    private enum CodingKeys: String, CodingKey {
        case fromEmail = "from_email"
        case fromName = "from_name"
        case toEmails = "to_emails"
        case ccEmails = "cc_emails"
        case bccEmails = "bcc_emails"
        case messageId = "message_id"
        case attachments = "attachments"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.fromEmail = try container.decodeIfPresent(String.self, forKey: .fromEmail) ?? nil
        self.fromName = try container.decodeIfPresent(String.self, forKey: .fromName) ?? nil
        self.toEmails = try container.decodeIfPresent([String].self, forKey: .toEmails) ?? []
        self.ccEmails = try container.decodeIfPresent([String].self, forKey: .ccEmails) ?? []
        self.bccEmails = try container.decodeIfPresent([String].self, forKey: .bccEmails) ?? []
        self.messageId = try container.decodeIfPresent(String.self, forKey: .messageId) ?? nil
        self.attachments = try container.decodeIfPresent([String].self, forKey: .attachments) ?? []
    }
}

// MARK: - Internal FFI conversions for EmailMetadata
internal extension EmailMetadata {
    init(_ rb: RustBridge.EmailMetadataRef) throws {
        self.fromEmail = rb.fromEmail()?.toString()
        self.fromName = rb.fromName()?.toString()
        self.toEmails = rb.toEmails().map { $0.as_str().toString() }
        self.ccEmails = rb.ccEmails().map { $0.as_str().toString() }
        self.bccEmails = rb.bccEmails().map { $0.as_str().toString() }
        self.messageId = rb.messageId()?.toString()
        self.attachments = rb.attachments().map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.EmailMetadata {
        let __toEmails = RustVec<RustString>()
        for __elem in self.toEmails { __toEmails.push(value: RustString(__elem)) }
        let __ccEmails = RustVec<RustString>()
        for __elem in self.ccEmails { __ccEmails.push(value: RustString(__elem)) }
        let __bccEmails = RustVec<RustString>()
        for __elem in self.bccEmails { __bccEmails.push(value: RustString(__elem)) }
        let __attachments = RustVec<RustString>()
        for __elem in self.attachments { __attachments.push(value: RustString(__elem)) }
        return RustBridge.EmailMetadata(self.fromEmail.map(RustString.init), self.fromName.map(RustString.init), __toEmails, __ccEmails, __bccEmails, self.messageId.map(RustString.init), __attachments)
    }
}

/// Archive (ZIP/TAR/7Z) metadata.
///
/// Extracted from compressed archive files containing file lists and size information.
public struct ArchiveMetadata: Codable, Sendable, Hashable {
    /// Archive format ("ZIP", "TAR", "7Z", etc.)
    public let format: String
    /// Total number of files in the archive
    public let fileCount: UInt32
    /// List of file paths within the archive
    public let fileList: [String]
    /// Total uncompressed size in bytes
    public let totalSize: UInt64
    /// Compressed size in bytes (if available)
    public let compressedSize: UInt64?
    public init(format: String, fileCount: UInt32, fileList: [String], totalSize: UInt64, compressedSize: UInt64? = nil) {
        self.format = format
        self.fileCount = fileCount
        self.fileList = fileList
        self.totalSize = totalSize
        self.compressedSize = compressedSize
    }
    private enum CodingKeys: String, CodingKey {
        case format = "format"
        case fileCount = "file_count"
        case fileList = "file_list"
        case totalSize = "total_size"
        case compressedSize = "compressed_size"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.format = try container.decodeIfPresent(String.self, forKey: .format) ?? ""
        self.fileCount = try container.decodeIfPresent(UInt32.self, forKey: .fileCount) ?? 0
        self.fileList = try container.decodeIfPresent([String].self, forKey: .fileList) ?? []
        self.totalSize = try container.decodeIfPresent(UInt64.self, forKey: .totalSize) ?? 0
        self.compressedSize = try container.decodeIfPresent(UInt64.self, forKey: .compressedSize) ?? nil
    }
}

// MARK: - Internal FFI conversions for ArchiveMetadata
internal extension ArchiveMetadata {
    init(_ rb: RustBridge.ArchiveMetadataRef) throws {
        self.format = rb.format().toString()
        self.fileCount = rb.fileCount()
        self.fileList = rb.fileList().map { $0.as_str().toString() }
        self.totalSize = rb.totalSize()
        self.compressedSize = rb.compressedSize()
    }
    func intoRust() throws -> RustBridge.ArchiveMetadata {
        let __fileList = RustVec<RustString>()
        for __elem in self.fileList { __fileList.push(value: RustString(__elem)) }
        return RustBridge.ArchiveMetadata(RustString(self.format), self.fileCount, __fileList, self.totalSize, self.compressedSize)
    }
}

/// Image metadata extracted from image files.
///
/// Includes dimensions, format, and EXIF data.
public typealias ImageMetadata = RustBridge.ImageMetadata

/// XML metadata extracted during XML parsing.
///
/// Provides statistics about XML document structure.
public struct XmlMetadata: Codable, Sendable, Hashable {
    /// Total number of XML elements processed
    public let elementCount: UInt32
    /// List of unique element tag names (sorted)
    public let uniqueElements: [String]
    public init(elementCount: UInt32, uniqueElements: [String]) {
        self.elementCount = elementCount
        self.uniqueElements = uniqueElements
    }
    private enum CodingKeys: String, CodingKey {
        case elementCount = "element_count"
        case uniqueElements = "unique_elements"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.elementCount = try container.decodeIfPresent(UInt32.self, forKey: .elementCount) ?? 0
        self.uniqueElements = try container.decodeIfPresent([String].self, forKey: .uniqueElements) ?? []
    }
}

// MARK: - Internal FFI conversions for XmlMetadata
internal extension XmlMetadata {
    init(_ rb: RustBridge.XmlMetadataRef) throws {
        self.elementCount = rb.elementCount()
        self.uniqueElements = rb.uniqueElements().map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.XmlMetadata {
        let __uniqueElements = RustVec<RustString>()
        for __elem in self.uniqueElements { __uniqueElements.push(value: RustString(__elem)) }
        return RustBridge.XmlMetadata(self.elementCount, __uniqueElements)
    }
}

/// Text/Markdown metadata.
///
/// Extracted from plain text and Markdown files. Includes word counts and,
/// for Markdown, structural elements like headers and links.
public struct TextMetadata: Codable, Sendable, Hashable {
    /// Number of lines in the document
    public let lineCount: UInt32
    /// Number of words
    public let wordCount: UInt32
    /// Number of characters
    public let characterCount: UInt32
    /// Markdown headers (headings text only, for Markdown files)
    public let headers: [String]?
    /// Markdown links as (text, url) tuples (for Markdown files)
    public let links: [[String]]?
    /// Code blocks as (language, code) tuples (for Markdown files)
    public let codeBlocks: [[String]]?
    public init(lineCount: UInt32, wordCount: UInt32, characterCount: UInt32, headers: [String]? = nil, links: [[String]]? = nil, codeBlocks: [[String]]? = nil) {
        self.lineCount = lineCount
        self.wordCount = wordCount
        self.characterCount = characterCount
        self.headers = headers
        self.links = links
        self.codeBlocks = codeBlocks
    }
    private enum CodingKeys: String, CodingKey {
        case lineCount = "line_count"
        case wordCount = "word_count"
        case characterCount = "character_count"
        case headers = "headers"
        case links = "links"
        case codeBlocks = "code_blocks"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.lineCount = try container.decodeIfPresent(UInt32.self, forKey: .lineCount) ?? 0
        self.wordCount = try container.decodeIfPresent(UInt32.self, forKey: .wordCount) ?? 0
        self.characterCount = try container.decodeIfPresent(UInt32.self, forKey: .characterCount) ?? 0
        self.headers = try container.decodeIfPresent([String].self, forKey: .headers) ?? nil
        self.links = try container.decodeIfPresent([[String]].self, forKey: .links) ?? nil
        self.codeBlocks = try container.decodeIfPresent([[String]].self, forKey: .codeBlocks) ?? nil
    }
}

// MARK: - Internal FFI conversions for TextMetadata
internal extension TextMetadata {
    init(_ rb: RustBridge.TextMetadataRef) throws {
        self.lineCount = rb.lineCount()
        self.wordCount = rb.wordCount()
        self.characterCount = rb.characterCount()
        self.headers = rb.headers()?.map { $0.as_str().toString() }
        self.links = nil
        self.codeBlocks = nil
    }
    func intoRust() throws -> RustBridge.TextMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.textMetadataFromJson(json)
    }
}

/// Header/heading element metadata.
public struct HeaderMetadata: Codable, Sendable, Hashable {
    /// Header level: 1 (h1) through 6 (h6)
    public let level: UInt8
    /// Normalized text content of the header
    public let text: String
    /// HTML id attribute if present
    public let id: String?
    /// Document tree depth at the header element
    public let depth: UInt32
    /// Byte offset in original HTML document
    public let htmlOffset: UInt32
    public init(level: UInt8, text: String, id: String? = nil, depth: UInt32, htmlOffset: UInt32) {
        self.level = level
        self.text = text
        self.id = id
        self.depth = depth
        self.htmlOffset = htmlOffset
    }
    private enum CodingKeys: String, CodingKey {
        case level = "level"
        case text = "text"
        case id = "id"
        case depth = "depth"
        case htmlOffset = "html_offset"
    }
}

// MARK: - Internal FFI conversions for HeaderMetadata
internal extension HeaderMetadata {
    init(_ rb: RustBridge.HeaderMetadataRef) throws {
        self.level = rb.level()
        self.text = rb.text().toString()
        self.id = rb.id()?.toString()
        self.depth = rb.depth()
        self.htmlOffset = rb.htmlOffset()
    }
    func intoRust() throws -> RustBridge.HeaderMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.headerMetadataFromJson(json)
    }
}

/// Link element metadata.
public struct LinkMetadata: Codable, Sendable, Hashable {
    /// The href URL value
    public let href: String
    /// Link text content (normalized)
    public let text: String
    /// Optional title attribute
    public let title: String?
    /// Link type classification
    public let linkType: LinkType
    /// Rel attribute values
    public let rel: [String]
    /// Additional attributes as key-value pairs
    public let attributes: [[String]]
    public init(href: String, text: String, title: String? = nil, linkType: LinkType, rel: [String], attributes: [[String]]) {
        self.href = href
        self.text = text
        self.title = title
        self.linkType = linkType
        self.rel = rel
        self.attributes = attributes
    }
    private enum CodingKeys: String, CodingKey {
        case href = "href"
        case text = "text"
        case title = "title"
        case linkType = "link_type"
        case rel = "rel"
        case attributes = "attributes"
    }
}

// MARK: - Internal FFI conversions for LinkMetadata
internal extension LinkMetadata {
    init(_ rb: RustBridge.LinkMetadataRef) throws {
        self.href = rb.href().toString()
        self.text = rb.text().toString()
        self.title = rb.title()?.toString()
        self.linkType = LinkType(rawValue: rb.linkType().toString()) ?? { fatalError("Unknown LinkType: \(rb.linkType().toString())") }()
        self.rel = rb.rel().map { $0.as_str().toString() }
        self.attributes = try JSONDecoder().decode([[String]].self, from: Data("null".utf8))
    }
    func intoRust() throws -> RustBridge.LinkMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.linkMetadataFromJson(json)
    }
}

/// Image element metadata.
public struct ImageMetadataType: Codable, Sendable, Hashable {
    /// Image source (URL, data URI, or SVG content)
    public let src: String
    /// Alternative text from alt attribute
    public let alt: String?
    /// Title attribute
    public let title: String?
    /// Image dimensions as (width, height) if available
    public let dimensions: [UInt32]?
    /// Image type classification
    public let imageType: ImageType
    /// Additional attributes as key-value pairs
    public let attributes: [[String]]
    public init(src: String, alt: String? = nil, title: String? = nil, dimensions: [UInt32]? = nil, imageType: ImageType, attributes: [[String]]) {
        self.src = src
        self.alt = alt
        self.title = title
        self.dimensions = dimensions
        self.imageType = imageType
        self.attributes = attributes
    }
    private enum CodingKeys: String, CodingKey {
        case src = "src"
        case alt = "alt"
        case title = "title"
        case dimensions = "dimensions"
        case imageType = "image_type"
        case attributes = "attributes"
    }
}

// MARK: - Internal FFI conversions for ImageMetadataType
internal extension ImageMetadataType {
    init(_ rb: RustBridge.ImageMetadataTypeRef) throws {
        self.src = rb.src().toString()
        self.alt = rb.alt()?.toString()
        self.title = rb.title()?.toString()
        self.dimensions = rb.dimensions().map { Array($0) }
        self.imageType = ImageType(rawValue: rb.imageType().toString()) ?? { fatalError("Unknown ImageType: \(rb.imageType().toString())") }()
        self.attributes = try JSONDecoder().decode([[String]].self, from: Data("null".utf8))
    }
    func intoRust() throws -> RustBridge.ImageMetadataType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.imageMetadataTypeFromJson(json)
    }
}

/// Structured data (Schema.org, microdata, RDFa) block.
public struct StructuredData: Codable, Sendable, Hashable {
    /// Type of structured data
    public let dataType: StructuredDataType
    /// Raw JSON string representation
    public let rawJson: String
    /// Schema type if detectable (e.g., "Article", "Event", "Product")
    public let schemaType: String?
    public init(dataType: StructuredDataType, rawJson: String, schemaType: String? = nil) {
        self.dataType = dataType
        self.rawJson = rawJson
        self.schemaType = schemaType
    }
    private enum CodingKeys: String, CodingKey {
        case dataType = "data_type"
        case rawJson = "raw_json"
        case schemaType = "schema_type"
    }
}

// MARK: - Internal FFI conversions for StructuredData
internal extension StructuredData {
    init(_ rb: RustBridge.StructuredDataRef) throws {
        self.dataType = StructuredDataType(rawValue: rb.dataType().toString()) ?? { fatalError("Unknown StructuredDataType: \(rb.dataType().toString())") }()
        self.rawJson = rb.rawJson().toString()
        self.schemaType = rb.schemaType()?.toString()
    }
    func intoRust() throws -> RustBridge.StructuredData {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.structuredDataFromJson(json)
    }
}

/// HTML metadata extracted from HTML documents.
///
/// Includes document-level metadata, Open Graph data, Twitter Card metadata,
/// and extracted structural elements (headers, links, images, structured data).
public typealias HtmlMetadata = RustBridge.HtmlMetadata

/// OCR processing metadata.
///
/// Captures information about OCR processing configuration and results.
public struct OcrMetadata: Codable, Sendable, Hashable {
    /// OCR language code(s) used
    public let language: String
    /// Tesseract Page Segmentation Mode (PSM)
    public let psm: Int32
    /// Output format (e.g., "text", "hocr")
    public let outputFormat: String
    /// Number of tables detected
    public let tableCount: UInt32
    public let tableRows: UInt32?
    public let tableCols: UInt32?
    public init(language: String, psm: Int32, outputFormat: String, tableCount: UInt32, tableRows: UInt32? = nil, tableCols: UInt32? = nil) {
        self.language = language
        self.psm = psm
        self.outputFormat = outputFormat
        self.tableCount = tableCount
        self.tableRows = tableRows
        self.tableCols = tableCols
    }
    private enum CodingKeys: String, CodingKey {
        case language = "language"
        case psm = "psm"
        case outputFormat = "output_format"
        case tableCount = "table_count"
        case tableRows = "table_rows"
        case tableCols = "table_cols"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.language = try container.decodeIfPresent(String.self, forKey: .language) ?? ""
        self.psm = try container.decodeIfPresent(Int32.self, forKey: .psm) ?? 0
        self.outputFormat = try container.decodeIfPresent(String.self, forKey: .outputFormat) ?? ""
        self.tableCount = try container.decodeIfPresent(UInt32.self, forKey: .tableCount) ?? 0
        self.tableRows = try container.decodeIfPresent(UInt32.self, forKey: .tableRows) ?? nil
        self.tableCols = try container.decodeIfPresent(UInt32.self, forKey: .tableCols) ?? nil
    }
}

// MARK: - Internal FFI conversions for OcrMetadata
internal extension OcrMetadata {
    init(_ rb: RustBridge.OcrMetadataRef) throws {
        self.language = rb.language().toString()
        self.psm = rb.psm()
        self.outputFormat = rb.outputFormat().toString()
        self.tableCount = rb.tableCount()
        self.tableRows = rb.tableRows()
        self.tableCols = rb.tableCols()
    }
    func intoRust() throws -> RustBridge.OcrMetadata {
        return RustBridge.OcrMetadata(RustString(self.language), self.psm, RustString(self.outputFormat), self.tableCount, self.tableRows, self.tableCols)
    }
}

/// Error metadata (for batch operations).
public struct ErrorMetadata: Codable, Sendable, Hashable {
    public let errorType: String
    public let message: String
    public init(errorType: String, message: String) {
        self.errorType = errorType
        self.message = message
    }
    private enum CodingKeys: String, CodingKey {
        case errorType = "error_type"
        case message = "message"
    }
}

// MARK: - Internal FFI conversions for ErrorMetadata
internal extension ErrorMetadata {
    init(_ rb: RustBridge.ErrorMetadataRef) throws {
        self.errorType = rb.errorType().toString()
        self.message = rb.message().toString()
    }
    func intoRust() throws -> RustBridge.ErrorMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.errorMetadataFromJson(json)
    }
}

/// PowerPoint presentation metadata.
///
/// Extracted from PPTX files containing slide counts and presentation details.
public struct PptxMetadata: Codable, Sendable, Hashable {
    /// Total number of slides in the presentation
    public let slideCount: UInt32
    /// Names of slides (if available)
    public let slideNames: [String]
    /// Number of embedded images
    public let imageCount: UInt32?
    /// Number of tables
    public let tableCount: UInt32?
    public init(slideCount: UInt32, slideNames: [String], imageCount: UInt32? = nil, tableCount: UInt32? = nil) {
        self.slideCount = slideCount
        self.slideNames = slideNames
        self.imageCount = imageCount
        self.tableCount = tableCount
    }
    private enum CodingKeys: String, CodingKey {
        case slideCount = "slide_count"
        case slideNames = "slide_names"
        case imageCount = "image_count"
        case tableCount = "table_count"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.slideCount = try container.decodeIfPresent(UInt32.self, forKey: .slideCount) ?? 0
        self.slideNames = try container.decodeIfPresent([String].self, forKey: .slideNames) ?? []
        self.imageCount = try container.decodeIfPresent(UInt32.self, forKey: .imageCount) ?? nil
        self.tableCount = try container.decodeIfPresent(UInt32.self, forKey: .tableCount) ?? nil
    }
}

// MARK: - Internal FFI conversions for PptxMetadata
internal extension PptxMetadata {
    init(_ rb: RustBridge.PptxMetadataRef) throws {
        self.slideCount = rb.slideCount()
        self.slideNames = rb.slideNames().map { $0.as_str().toString() }
        self.imageCount = rb.imageCount()
        self.tableCount = rb.tableCount()
    }
    func intoRust() throws -> RustBridge.PptxMetadata {
        let __slideNames = RustVec<RustString>()
        for __elem in self.slideNames { __slideNames.push(value: RustString(__elem)) }
        return RustBridge.PptxMetadata(self.slideCount, __slideNames, self.imageCount, self.tableCount)
    }
}

/// Word document metadata.
///
/// Extracted from DOCX files using shared Office Open XML metadata extraction.
/// Integrates with `office_metadata` module for core/app/custom properties.
public typealias DocxMetadata = RustBridge.DocxMetadata

/// CSV/TSV file metadata.
public struct CsvMetadata: Codable, Sendable, Hashable {
    public let rowCount: UInt32
    public let columnCount: UInt32
    public let delimiter: String?
    public let hasHeader: Bool
    public let columnTypes: [String]?
    public init(rowCount: UInt32, columnCount: UInt32, delimiter: String? = nil, hasHeader: Bool, columnTypes: [String]? = nil) {
        self.rowCount = rowCount
        self.columnCount = columnCount
        self.delimiter = delimiter
        self.hasHeader = hasHeader
        self.columnTypes = columnTypes
    }
    private enum CodingKeys: String, CodingKey {
        case rowCount = "row_count"
        case columnCount = "column_count"
        case delimiter = "delimiter"
        case hasHeader = "has_header"
        case columnTypes = "column_types"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.rowCount = try container.decodeIfPresent(UInt32.self, forKey: .rowCount) ?? 0
        self.columnCount = try container.decodeIfPresent(UInt32.self, forKey: .columnCount) ?? 0
        self.delimiter = try container.decodeIfPresent(String.self, forKey: .delimiter) ?? nil
        self.hasHeader = try container.decodeIfPresent(Bool.self, forKey: .hasHeader) ?? false
        self.columnTypes = try container.decodeIfPresent([String].self, forKey: .columnTypes) ?? nil
    }
}

// MARK: - Internal FFI conversions for CsvMetadata
internal extension CsvMetadata {
    init(_ rb: RustBridge.CsvMetadataRef) throws {
        self.rowCount = rb.rowCount()
        self.columnCount = rb.columnCount()
        self.delimiter = rb.delimiter()?.toString()
        self.hasHeader = rb.hasHeader()
        self.columnTypes = rb.columnTypes()?.map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.CsvMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.csvMetadataFromJson(json)
    }
}

/// BibTeX bibliography metadata.
public typealias BibtexMetadata = RustBridge.BibtexMetadata

/// Citation file metadata (RIS, PubMed, EndNote).
public struct CitationMetadata: Codable, Sendable, Hashable {
    public let citationCount: UInt
    public let format: String?
    public let authors: [String]
    public let yearRange: YearRange?
    public let dois: [String]
    public let keywords: [String]
    public init(citationCount: UInt, format: String? = nil, authors: [String], yearRange: YearRange? = nil, dois: [String], keywords: [String]) {
        self.citationCount = citationCount
        self.format = format
        self.authors = authors
        self.yearRange = yearRange
        self.dois = dois
        self.keywords = keywords
    }
    private enum CodingKeys: String, CodingKey {
        case citationCount = "citation_count"
        case format = "format"
        case authors = "authors"
        case yearRange = "year_range"
        case dois = "dois"
        case keywords = "keywords"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.citationCount = try container.decodeIfPresent(UInt.self, forKey: .citationCount) ?? 0
        self.format = try container.decodeIfPresent(String.self, forKey: .format) ?? nil
        self.authors = try container.decodeIfPresent([String].self, forKey: .authors) ?? []
        self.yearRange = try container.decodeIfPresent(YearRange.self, forKey: .yearRange) ?? nil
        self.dois = try container.decodeIfPresent([String].self, forKey: .dois) ?? []
        self.keywords = try container.decodeIfPresent([String].self, forKey: .keywords) ?? []
    }
}

// MARK: - Internal FFI conversions for CitationMetadata
internal extension CitationMetadata {
    init(_ rb: RustBridge.CitationMetadataRef) throws {
        self.citationCount = rb.citationCount()
        self.format = rb.format()?.toString()
        self.authors = rb.authors().map { $0.as_str().toString() }
        self.yearRange = try rb.yearRange().map { try YearRange($0) }
        self.dois = rb.dois().map { $0.as_str().toString() }
        self.keywords = rb.keywords().map { $0.as_str().toString() }
    }
    func intoRust() throws -> RustBridge.CitationMetadata {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.citationMetadataFromJson(json)
    }
}

/// Year range for bibliographic metadata.
public struct YearRange: Codable, Sendable, Hashable {
    public let min: UInt32?
    public let max: UInt32?
    public let years: [UInt32]
    public init(min: UInt32? = nil, max: UInt32? = nil, years: [UInt32]) {
        self.min = min
        self.max = max
        self.years = years
    }
}

// MARK: - Internal FFI conversions for YearRange
internal extension YearRange {
    init(_ rb: RustBridge.YearRangeRef) throws {
        self.min = rb.min()
        self.max = rb.max()
        self.years = Array(rb.years())
    }
    func intoRust() throws -> RustBridge.YearRange {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.yearRangeFromJson(json)
    }
}

/// FictionBook (FB2) metadata.
public struct FictionBookMetadata: Codable, Sendable, Hashable {
    public let genres: [String]
    public let sequences: [String]
    public let annotation: String?
    public init(genres: [String], sequences: [String], annotation: String? = nil) {
        self.genres = genres
        self.sequences = sequences
        self.annotation = annotation
    }
    private enum CodingKeys: String, CodingKey {
        case genres = "genres"
        case sequences = "sequences"
        case annotation = "annotation"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.genres = try container.decodeIfPresent([String].self, forKey: .genres) ?? []
        self.sequences = try container.decodeIfPresent([String].self, forKey: .sequences) ?? []
        self.annotation = try container.decodeIfPresent(String.self, forKey: .annotation) ?? nil
    }
}

// MARK: - Internal FFI conversions for FictionBookMetadata
internal extension FictionBookMetadata {
    init(_ rb: RustBridge.FictionBookMetadataRef) throws {
        self.genres = rb.genres().map { $0.as_str().toString() }
        self.sequences = rb.sequences().map { $0.as_str().toString() }
        self.annotation = rb.annotation()?.toString()
    }
    func intoRust() throws -> RustBridge.FictionBookMetadata {
        let __genres = RustVec<RustString>()
        for __elem in self.genres { __genres.push(value: RustString(__elem)) }
        let __sequences = RustVec<RustString>()
        for __elem in self.sequences { __sequences.push(value: RustString(__elem)) }
        return RustBridge.FictionBookMetadata(__genres, __sequences, self.annotation.map(RustString.init))
    }
}

/// dBASE (DBF) file metadata.
public struct DbfMetadata: Codable, Sendable, Hashable {
    public let recordCount: UInt
    public let fieldCount: UInt
    public let fields: [DbfFieldInfo]
    public init(recordCount: UInt, fieldCount: UInt, fields: [DbfFieldInfo]) {
        self.recordCount = recordCount
        self.fieldCount = fieldCount
        self.fields = fields
    }
    private enum CodingKeys: String, CodingKey {
        case recordCount = "record_count"
        case fieldCount = "field_count"
        case fields = "fields"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.recordCount = try container.decodeIfPresent(UInt.self, forKey: .recordCount) ?? 0
        self.fieldCount = try container.decodeIfPresent(UInt.self, forKey: .fieldCount) ?? 0
        self.fields = try container.decodeIfPresent([DbfFieldInfo].self, forKey: .fields) ?? []
    }
}

// MARK: - Internal FFI conversions for DbfMetadata
internal extension DbfMetadata {
    init(_ rb: RustBridge.DbfMetadataRef) throws {
        self.recordCount = rb.recordCount()
        self.fieldCount = rb.fieldCount()
        self.fields = try rb.fields().map { try DbfFieldInfo($0) }
    }
    func intoRust() throws -> RustBridge.DbfMetadata {
        let __fields = RustVec<RustBridge.DbfFieldInfo>()
        for __elem in self.fields { __fields.push(value: try __elem.intoRust()) }
        return RustBridge.DbfMetadata(self.recordCount, self.fieldCount, __fields)
    }
}

/// dBASE field information.
public struct DbfFieldInfo: Codable, Sendable, Hashable {
    public let name: String
    public let fieldType: String
    public init(name: String, fieldType: String) {
        self.name = name
        self.fieldType = fieldType
    }
    private enum CodingKeys: String, CodingKey {
        case name = "name"
        case fieldType = "field_type"
    }
}

// MARK: - Internal FFI conversions for DbfFieldInfo
internal extension DbfFieldInfo {
    init(_ rb: RustBridge.DbfFieldInfoRef) throws {
        self.name = rb.name().toString()
        self.fieldType = rb.fieldType().toString()
    }
    func intoRust() throws -> RustBridge.DbfFieldInfo {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.dbfFieldInfoFromJson(json)
    }
}

/// JATS (Journal Article Tag Suite) metadata.
public typealias JatsMetadata = RustBridge.JatsMetadata

/// JATS contributor with role.
public struct ContributorRole: Codable, Sendable, Hashable {
    public let name: String
    public let role: String?
    public init(name: String, role: String? = nil) {
        self.name = name
        self.role = role
    }
}

// MARK: - Internal FFI conversions for ContributorRole
internal extension ContributorRole {
    init(_ rb: RustBridge.ContributorRoleRef) throws {
        self.name = rb.name().toString()
        self.role = rb.role()?.toString()
    }
    func intoRust() throws -> RustBridge.ContributorRole {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.contributorRoleFromJson(json)
    }
}

/// EPUB metadata (Dublin Core extensions).
public struct EpubMetadata: Codable, Sendable, Hashable {
    public let coverage: String?
    public let dcFormat: String?
    public let relation: String?
    public let source: String?
    public let dcType: String?
    public let coverImage: String?
    public init(coverage: String? = nil, dcFormat: String? = nil, relation: String? = nil, source: String? = nil, dcType: String? = nil, coverImage: String? = nil) {
        self.coverage = coverage
        self.dcFormat = dcFormat
        self.relation = relation
        self.source = source
        self.dcType = dcType
        self.coverImage = coverImage
    }
    private enum CodingKeys: String, CodingKey {
        case coverage = "coverage"
        case dcFormat = "dc_format"
        case relation = "relation"
        case source = "source"
        case dcType = "dc_type"
        case coverImage = "cover_image"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.coverage = try container.decodeIfPresent(String.self, forKey: .coverage) ?? nil
        self.dcFormat = try container.decodeIfPresent(String.self, forKey: .dcFormat) ?? nil
        self.relation = try container.decodeIfPresent(String.self, forKey: .relation) ?? nil
        self.source = try container.decodeIfPresent(String.self, forKey: .source) ?? nil
        self.dcType = try container.decodeIfPresent(String.self, forKey: .dcType) ?? nil
        self.coverImage = try container.decodeIfPresent(String.self, forKey: .coverImage) ?? nil
    }
}

// MARK: - Internal FFI conversions for EpubMetadata
internal extension EpubMetadata {
    init(_ rb: RustBridge.EpubMetadataRef) throws {
        self.coverage = rb.coverage()?.toString()
        self.dcFormat = rb.dcFormat()?.toString()
        self.relation = rb.relation()?.toString()
        self.source = rb.source()?.toString()
        self.dcType = rb.dcType()?.toString()
        self.coverImage = rb.coverImage()?.toString()
    }
    func intoRust() throws -> RustBridge.EpubMetadata {
        return RustBridge.EpubMetadata(self.coverage.map(RustString.init), self.dcFormat.map(RustString.init), self.relation.map(RustString.init), self.source.map(RustString.init), self.dcType.map(RustString.init), self.coverImage.map(RustString.init))
    }
}

/// Outlook PST archive metadata.
public struct PstMetadata: Codable, Sendable, Hashable {
    public let messageCount: UInt
    public init(messageCount: UInt) {
        self.messageCount = messageCount
    }
    private enum CodingKeys: String, CodingKey {
        case messageCount = "message_count"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.messageCount = try container.decodeIfPresent(UInt.self, forKey: .messageCount) ?? 0
    }
}

// MARK: - Internal FFI conversions for PstMetadata
internal extension PstMetadata {
    init(_ rb: RustBridge.PstMetadataRef) throws {
        self.messageCount = rb.messageCount()
    }
    func intoRust() throws -> RustBridge.PstMetadata {
        return RustBridge.PstMetadata(self.messageCount)
    }
}

/// Confidence scores for an OCR element.
///
/// Separates detection confidence (how confident that text exists at this location)
/// from recognition confidence (how confident about the actual text content).
public struct OcrConfidence: Codable, Sendable, Hashable {
    /// Detection confidence: how confident the OCR engine is that text exists here.
    ///
    /// PaddleOCR provides this as `box_score`, Tesseract doesn't have a direct equivalent.
    /// Range: 0.0 to 1.0 (or None if not available).
    public let detection: Double?
    /// Recognition confidence: how confident about the text content.
    ///
    /// Range: 0.0 to 1.0.
    public let recognition: Double
    public init(detection: Double? = nil, recognition: Double) {
        self.detection = detection
        self.recognition = recognition
    }
    private enum CodingKeys: String, CodingKey {
        case detection = "detection"
        case recognition = "recognition"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.detection = try container.decodeIfPresent(Double.self, forKey: .detection) ?? nil
        self.recognition = try container.decodeIfPresent(Double.self, forKey: .recognition) ?? 0
    }
}

// MARK: - Internal FFI conversions for OcrConfidence
internal extension OcrConfidence {
    init(_ rb: RustBridge.OcrConfidenceRef) throws {
        self.detection = rb.detection()
        self.recognition = rb.recognition()
    }
    func intoRust() throws -> RustBridge.OcrConfidence {
        return RustBridge.OcrConfidence(self.detection, self.recognition)
    }
}

/// Rotation information for an OCR element.
public struct OcrRotation: Codable, Sendable, Hashable {
    /// Rotation angle in degrees (0, 90, 180, 270 for PaddleOCR).
    public let angleDegrees: Double
    /// Confidence score for the rotation detection.
    public let confidence: Double?
    public init(angleDegrees: Double, confidence: Double? = nil) {
        self.angleDegrees = angleDegrees
        self.confidence = confidence
    }
    private enum CodingKeys: String, CodingKey {
        case angleDegrees = "angle_degrees"
        case confidence = "confidence"
    }
}

// MARK: - Internal FFI conversions for OcrRotation
internal extension OcrRotation {
    init(_ rb: RustBridge.OcrRotationRef) throws {
        self.angleDegrees = rb.angleDegrees()
        self.confidence = rb.confidence()
    }
    func intoRust() throws -> RustBridge.OcrRotation {
        return RustBridge.OcrRotation(self.angleDegrees, self.confidence)
    }
}

/// A unified OCR element representing detected text with full metadata.
///
/// This is the primary type for structured OCR output, preserving all information
/// from both Tesseract and PaddleOCR backends.
public typealias OcrElement = RustBridge.OcrElement

/// Configuration for OCR element extraction.
///
/// Controls how OCR elements are extracted and filtered.
public struct OcrElementConfig: Codable, Sendable, Hashable {
    /// Whether to include OCR elements in the extraction result.
    ///
    /// When true, the `ocr_elements` field in `ExtractionResult` will be populated.
    public let includeElements: Bool
    /// Minimum hierarchical level to include.
    ///
    /// Elements below this level (e.g., words when min_level is Line) will be excluded.
    public let minLevel: OcrElementLevel
    /// Minimum recognition confidence threshold (0.0-1.0).
    ///
    /// Elements with confidence below this threshold will be filtered out.
    public let minConfidence: Double
    /// Whether to build hierarchical relationships between elements.
    ///
    /// When true, `parent_id` fields will be populated based on spatial containment.
    /// Only meaningful for Tesseract output.
    public let buildHierarchy: Bool
    public init(includeElements: Bool, minLevel: OcrElementLevel, minConfidence: Double, buildHierarchy: Bool) {
        self.includeElements = includeElements
        self.minLevel = minLevel
        self.minConfidence = minConfidence
        self.buildHierarchy = buildHierarchy
    }
    private enum CodingKeys: String, CodingKey {
        case includeElements = "include_elements"
        case minLevel = "min_level"
        case minConfidence = "min_confidence"
        case buildHierarchy = "build_hierarchy"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.includeElements = try container.decodeIfPresent(Bool.self, forKey: .includeElements) ?? false
        self.minLevel = try container.decode(OcrElementLevel.self, forKey: .minLevel)
        self.minConfidence = try container.decodeIfPresent(Double.self, forKey: .minConfidence) ?? 0
        self.buildHierarchy = try container.decodeIfPresent(Bool.self, forKey: .buildHierarchy) ?? false
    }
}

// MARK: - Internal FFI conversions for OcrElementConfig
internal extension OcrElementConfig {
    init(_ rb: RustBridge.OcrElementConfigRef) throws {
        self.includeElements = rb.includeElements()
        self.minLevel = OcrElementLevel(rawValue: rb.minLevel().toString()) ?? { fatalError("Unknown OcrElementLevel: \(rb.minLevel().toString())") }()
        self.minConfidence = rb.minConfidence()
        self.buildHierarchy = rb.buildHierarchy()
    }
    func intoRust() throws -> RustBridge.OcrElementConfig {
        return RustBridge.OcrElementConfig(self.includeElements, try self.minLevel.intoRust(), self.minConfidence, self.buildHierarchy)
    }
}

/// Unified page structure for documents.
///
/// Supports different page types (PDF pages, PPTX slides, Excel sheets)
/// with character offset boundaries for chunk-to-page mapping.
public struct PageStructure: Codable, Sendable, Hashable {
    /// Total number of pages/slides/sheets
    public let totalCount: UInt32
    /// Type of paginated unit
    public let unitType: PageUnitType
    /// Character offset boundaries for each page
    ///
    /// Maps character ranges in the extracted content to page numbers.
    /// Used for chunk page range calculation.
    public let boundaries: [PageBoundary]?
    /// Detailed per-page metadata (optional, only when needed)
    public let pages: [PageInfo]?
    public init(totalCount: UInt32, unitType: PageUnitType, boundaries: [PageBoundary]? = nil, pages: [PageInfo]? = nil) {
        self.totalCount = totalCount
        self.unitType = unitType
        self.boundaries = boundaries
        self.pages = pages
    }
    private enum CodingKeys: String, CodingKey {
        case totalCount = "total_count"
        case unitType = "unit_type"
        case boundaries = "boundaries"
        case pages = "pages"
    }
}

// MARK: - Internal FFI conversions for PageStructure
internal extension PageStructure {
    init(_ rb: RustBridge.PageStructureRef) throws {
        self.totalCount = rb.totalCount()
        self.unitType = PageUnitType(rawValue: rb.unitType().toString()) ?? { fatalError("Unknown PageUnitType: \(rb.unitType().toString())") }()
        self.boundaries = try rb.boundaries()?.map { try PageBoundary($0) }
        self.pages = try rb.pages()?.map { try PageInfo($0) }
    }
    func intoRust() throws -> RustBridge.PageStructure {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.pageStructureFromJson(json)
    }
}

/// Byte offset boundary for a page.
///
/// Tracks where a specific page's content starts and ends in the main content string,
/// enabling mapping from byte positions to page numbers. Offsets are guaranteed to be
/// at valid UTF-8 character boundaries when using standard String methods (push_str, push, etc.).
public struct PageBoundary: Codable, Sendable, Hashable {
    /// Byte offset where this page starts in the content string (UTF-8 valid boundary, inclusive)
    public let byteStart: UInt
    /// Byte offset where this page ends in the content string (UTF-8 valid boundary, exclusive)
    public let byteEnd: UInt
    /// Page number (1-indexed)
    public let pageNumber: UInt32
    public init(byteStart: UInt, byteEnd: UInt, pageNumber: UInt32) {
        self.byteStart = byteStart
        self.byteEnd = byteEnd
        self.pageNumber = pageNumber
    }
    private enum CodingKeys: String, CodingKey {
        case byteStart = "byte_start"
        case byteEnd = "byte_end"
        case pageNumber = "page_number"
    }
}

// MARK: - Internal FFI conversions for PageBoundary
internal extension PageBoundary {
    init(_ rb: RustBridge.PageBoundaryRef) throws {
        self.byteStart = rb.byteStart()
        self.byteEnd = rb.byteEnd()
        self.pageNumber = rb.pageNumber()
    }
    func intoRust() throws -> RustBridge.PageBoundary {
        return RustBridge.PageBoundary(self.byteStart, self.byteEnd, self.pageNumber)
    }
}

/// Metadata for individual page/slide/sheet.
///
/// Captures per-page information including dimensions, content counts,
/// and visibility state (for presentations).
public struct PageInfo: Codable, Sendable, Hashable {
    /// Page number (1-indexed)
    public let number: UInt32
    /// Page title (usually for presentations)
    public let title: String?
    /// Dimensions in points (PDF) or pixels (images): (width, height)
    public let dimensions: [Double]?
    /// Number of images on this page
    public let imageCount: UInt32?
    /// Number of tables on this page
    public let tableCount: UInt32?
    /// Whether this page is hidden (e.g., in presentations)
    public let hidden: Bool?
    /// Whether this page is blank (no meaningful text, no images, no tables)
    ///
    /// A page is considered blank if it has fewer than 3 non-whitespace characters
    /// and contains no tables or images. This is useful for filtering out empty pages
    /// in scanned documents or PDFs with blank separator pages.
    public let isBlank: Bool?
    /// Whether this page contains non-trivial vector graphics (paths, shapes, curves)
    ///
    /// Indicates the presence of vector-drawn content such as charts, diagrams,
    /// or geometric shapes (e.g., from Adobe InDesign, LaTeX TikZ). These are
    /// invisible to `ExtractionResult.images` since they are not embedded as raster
    /// XObjects. Set to `true` when path count exceeds a heuristic threshold,
    /// signaling that downstream consumers may want to rasterize the page to
    /// capture this content.
    ///
    /// Only populated for PDFs; `None` for other document types.
    public let hasVectorGraphics: Bool
    public init(number: UInt32, title: String? = nil, dimensions: [Double]? = nil, imageCount: UInt32? = nil, tableCount: UInt32? = nil, hidden: Bool? = nil, isBlank: Bool? = nil, hasVectorGraphics: Bool) {
        self.number = number
        self.title = title
        self.dimensions = dimensions
        self.imageCount = imageCount
        self.tableCount = tableCount
        self.hidden = hidden
        self.isBlank = isBlank
        self.hasVectorGraphics = hasVectorGraphics
    }
    private enum CodingKeys: String, CodingKey {
        case number = "number"
        case title = "title"
        case dimensions = "dimensions"
        case imageCount = "image_count"
        case tableCount = "table_count"
        case hidden = "hidden"
        case isBlank = "is_blank"
        case hasVectorGraphics = "has_vector_graphics"
    }
}

// MARK: - Internal FFI conversions for PageInfo
internal extension PageInfo {
    init(_ rb: RustBridge.PageInfoRef) throws {
        self.number = rb.number()
        self.title = rb.title()?.toString()
        self.dimensions = rb.dimensions().map { Array($0) }
        self.imageCount = rb.imageCount()
        self.tableCount = rb.tableCount()
        self.hidden = rb.hidden()
        self.isBlank = rb.isBlank()
        self.hasVectorGraphics = rb.hasVectorGraphics()
    }
    func intoRust() throws -> RustBridge.PageInfo {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.pageInfoFromJson(json)
    }
}

/// Content for a single page/slide.
///
/// When page extraction is enabled, documents are split into per-page content
/// with associated tables and images mapped to each page.
///
/// # Performance
///
/// Uses Arc-wrapped tables and images for memory efficiency:
/// - `Vec<Arc<Table>>` enables zero-copy sharing of table data
/// - `Vec<Arc<ExtractedImage>>` enables zero-copy sharing of image data
/// - Maintains exact JSON compatibility via custom Serialize/Deserialize
///
/// This reduces memory overhead for documents with shared tables/images
/// by avoiding redundant copies during serialization.
public struct PageContent: Codable, Sendable, Hashable {
    /// Page number (1-indexed)
    public let pageNumber: UInt32
    /// Text content for this page
    public let content: String
    /// Tables found on this page (uses Arc for memory efficiency)
    ///
    /// Serializes as Vec<Table> for JSON compatibility while maintaining
    /// Arc semantics in-memory for zero-copy sharing.
    public let tables: [Table]
    /// Indices into `ExtractionResult.images` for images found on this page.
    ///
    /// Each value is a zero-based index into the top-level `images` collection.
    /// Only populated when `extract_images = true` in the extraction config.
    public let imageIndices: [UInt32]
    /// Hierarchy information for the page (when hierarchy extraction is enabled)
    ///
    /// Contains text hierarchy levels (H1-H6) extracted from the page content.
    public let hierarchy: PageHierarchy?
    /// Whether this page is blank (no meaningful text content)
    ///
    /// Determined during extraction based on text content analysis.
    /// A page is blank if it has fewer than 3 non-whitespace characters
    /// and contains no tables or images.
    public let isBlank: Bool?
    /// Layout detection regions for this page (when layout detection is enabled).
    ///
    /// Contains detected layout regions with class, confidence, bounding box,
    /// and area fraction. Only populated when layout detection is configured.
    public let layoutRegions: [LayoutRegion]?
    /// Speaker notes for this slide (PPTX only).
    ///
    /// Contains the text from the slide's notes pane (`ppt/notesSlides/notesSlide{N}.xml`).
    /// Only populated when the source is a PPTX file and notes are present.
    public let speakerNotes: String?
    /// Section name this slide belongs to (PPTX only).
    ///
    /// PowerPoint sections group slides into logical chapters (`<p:sectionLst>` in
    /// `ppt/presentation.xml`). Only populated when the source is a PPTX file and
    /// the slide belongs to a named section.
    public let sectionName: String?
    /// Sheet name for this page (XLSX/ODS only).
    ///
    /// Each spreadsheet sheet maps to one `PageContent` entry. This field carries the
    /// sheet's display name as it appears in the workbook. `None` for all non-spreadsheet
    /// formats and for sheets with an empty name.
    public let sheetName: String?
    public init(pageNumber: UInt32, content: String, tables: [Table], imageIndices: [UInt32], hierarchy: PageHierarchy? = nil, isBlank: Bool? = nil, layoutRegions: [LayoutRegion]? = nil, speakerNotes: String? = nil, sectionName: String? = nil, sheetName: String? = nil) {
        self.pageNumber = pageNumber
        self.content = content
        self.tables = tables
        self.imageIndices = imageIndices
        self.hierarchy = hierarchy
        self.isBlank = isBlank
        self.layoutRegions = layoutRegions
        self.speakerNotes = speakerNotes
        self.sectionName = sectionName
        self.sheetName = sheetName
    }
    private enum CodingKeys: String, CodingKey {
        case pageNumber = "page_number"
        case content = "content"
        case tables = "tables"
        case imageIndices = "image_indices"
        case hierarchy = "hierarchy"
        case isBlank = "is_blank"
        case layoutRegions = "layout_regions"
        case speakerNotes = "speaker_notes"
        case sectionName = "section_name"
        case sheetName = "sheet_name"
    }
}

// MARK: - Internal FFI conversions for PageContent
internal extension PageContent {
    init(_ rb: RustBridge.PageContentRef) throws {
        self.pageNumber = rb.pageNumber()
        self.content = rb.content().toString()
        self.tables = try rb.tables().map { try Table($0) }
        self.imageIndices = Array(rb.imageIndices())
        self.hierarchy = try rb.hierarchy().map { try PageHierarchy($0) }
        self.isBlank = rb.isBlank()
        self.layoutRegions = try rb.layoutRegions()?.map { try LayoutRegion($0) }
        self.speakerNotes = rb.speakerNotes()?.toString()
        self.sectionName = rb.sectionName()?.toString()
        self.sheetName = rb.sheetName()?.toString()
    }
    func intoRust() throws -> RustBridge.PageContent {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.pageContentFromJson(json)
    }
}

/// A detected layout region on a page.
///
/// When layout detection is enabled, each page may have layout regions
/// identifying different content types (text, pictures, tables, etc.)
/// with confidence scores and spatial positions.
public struct LayoutRegion: Codable, Sendable, Hashable {
    /// Layout class name (e.g. "picture", "table", "text", "section_header").
    public let className: String
    /// Confidence score from the layout detection model (0.0 to 1.0).
    public let confidence: Double
    /// Bounding box in document coordinate space.
    public let boundingBox: BoundingBox
    /// Fraction of the page area covered by this region (0.0 to 1.0).
    public let areaFraction: Double
    public init(className: String, confidence: Double, boundingBox: BoundingBox, areaFraction: Double) {
        self.className = className
        self.confidence = confidence
        self.boundingBox = boundingBox
        self.areaFraction = areaFraction
    }
    private enum CodingKeys: String, CodingKey {
        case className = "class_name"
        case confidence = "confidence"
        case boundingBox = "bounding_box"
        case areaFraction = "area_fraction"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.className = try container.decodeIfPresent(String.self, forKey: .className) ?? ""
        self.confidence = try container.decodeIfPresent(Double.self, forKey: .confidence) ?? 0
        self.boundingBox = try container.decode(BoundingBox.self, forKey: .boundingBox)
        self.areaFraction = try container.decodeIfPresent(Double.self, forKey: .areaFraction) ?? 0
    }
}

// MARK: - Internal FFI conversions for LayoutRegion
internal extension LayoutRegion {
    init(_ rb: RustBridge.LayoutRegionRef) throws {
        self.className = rb.className().toString()
        self.confidence = rb.confidence()
        self.boundingBox = try BoundingBox(rb.boundingBox())
        self.areaFraction = rb.areaFraction()
    }
    func intoRust() throws -> RustBridge.LayoutRegion {
        return RustBridge.LayoutRegion(RustString(self.className), self.confidence, try self.boundingBox.intoRust(), self.areaFraction)
    }
}

/// Page hierarchy structure containing heading levels and block information.
///
/// Used when PDF text hierarchy extraction is enabled. Contains hierarchical
/// blocks with heading levels (H1-H6) for semantic document structure.
public struct PageHierarchy: Codable, Sendable, Hashable {
    /// Number of hierarchy blocks on this page
    public let blockCount: UInt32
    /// Hierarchical blocks with heading levels
    public let blocks: [HierarchicalBlock]
    public init(blockCount: UInt32, blocks: [HierarchicalBlock]) {
        self.blockCount = blockCount
        self.blocks = blocks
    }
    private enum CodingKeys: String, CodingKey {
        case blockCount = "block_count"
        case blocks = "blocks"
    }
}

// MARK: - Internal FFI conversions for PageHierarchy
internal extension PageHierarchy {
    init(_ rb: RustBridge.PageHierarchyRef) throws {
        self.blockCount = rb.blockCount()
        self.blocks = try rb.blocks().map { try HierarchicalBlock($0) }
    }
    func intoRust() throws -> RustBridge.PageHierarchy {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.pageHierarchyFromJson(json)
    }
}

/// A text block with hierarchy level assignment.
///
/// Represents a block of text with semantic heading information extracted from
/// font size clustering and hierarchical analysis.
public struct HierarchicalBlock: Codable, Sendable, Hashable {
    /// The text content of this block
    public let text: String
    /// The font size of the text in this block
    public let fontSize: Float
    /// The hierarchy level of this block (H1-H6 or Body)
    ///
    /// Levels correspond to HTML heading tags:
    /// - "h1": Top-level heading
    /// - "h2": Secondary heading
    /// - "h3": Tertiary heading
    /// - "h4": Quaternary heading
    /// - "h5": Quinary heading
    /// - "h6": Senary heading
    /// - "body": Body text (no heading level)
    public let level: String
    /// Bounding box information for the block
    ///
    /// Contains coordinates as (left, top, right, bottom) in PDF units.
    public let bbox: [Float]?
    public init(text: String, fontSize: Float, level: String, bbox: [Float]? = nil) {
        self.text = text
        self.fontSize = fontSize
        self.level = level
        self.bbox = bbox
    }
    private enum CodingKeys: String, CodingKey {
        case text = "text"
        case fontSize = "font_size"
        case level = "level"
        case bbox = "bbox"
    }
}

// MARK: - Internal FFI conversions for HierarchicalBlock
internal extension HierarchicalBlock {
    init(_ rb: RustBridge.HierarchicalBlockRef) throws {
        self.text = rb.text().toString()
        self.fontSize = rb.fontSize()
        self.level = rb.level().toString()
        self.bbox = rb.bbox().map { Array($0) }
    }
    func intoRust() throws -> RustBridge.HierarchicalBlock {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.hierarchicalBlockFromJson(json)
    }
}

/// A single changed cell within a table.
///
/// Defined here (rather than only in `crate::diff`) so `RevisionDelta` can
/// reference it unconditionally, without requiring the `diff` Cargo feature.
/// `crate::diff` re-exports this type verbatim.
public struct CellChange: Codable, Sendable, Hashable {
    /// Zero-based row index.
    public let row: UInt
    /// Zero-based column index.
    public let col: UInt
    /// Value before the change.
    public let from: String
    /// Value after the change.
    public let to: String
    public init(row: UInt, col: UInt, from: String, to: String) {
        self.row = row
        self.col = col
        self.from = from
        self.to = to
    }
}

// MARK: - Internal FFI conversions for CellChange
internal extension CellChange {
    init(_ rb: RustBridge.CellChangeRef) throws {
        self.row = rb.row()
        self.col = rb.col()
        self.from = rb.from().toString()
        self.to = rb.to().toString()
    }
    func intoRust() throws -> RustBridge.CellChange {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.cellChangeFromJson(json)
    }
}

/// A single tracked change embedded in a document.
///
/// Populated by per-format extractors that understand change-tracking metadata
/// (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`, …). Every
/// extractor defaults to `ExtractionResult.revisions = None` until a
/// format-specific implementation is added.
public struct DocumentRevision: Codable, Sendable, Hashable {
    /// Format-specific revision identifier.
    ///
    /// For DOCX this is the `w:id` attribute value on the change element
    /// (e.g. `"42"`). When the attribute is absent a synthetic fallback is
    /// generated (`"docx-ins-0"`, `"docx-del-3"`, …).
    public let revisionId: String
    /// Display name of the author who made this change, when available.
    public let author: String?
    /// ISO-8601 timestamp of the change, when available.
    ///
    /// Stored as a plain string so this type remains FFI-friendly and
    /// unconditionally available without the `chrono` optional dep.
    /// DOCX populates this from the `w:date` attribute (e.g.
    /// `"2024-03-15T10:30:00Z"`).
    public let timestamp: String?
    /// Semantic kind of this revision.
    public let kind: RevisionKind
    /// Best-effort document location for this revision.
    ///
    /// Resolution is format-dependent and may be `None` when the location
    /// cannot be determined (e.g. changes inside table cells before
    /// table-cell anchor support is added).
    public let anchor: RevisionAnchor?
    /// The content changes that make up this revision.
    public let delta: RevisionDelta
    public init(revisionId: String, author: String? = nil, timestamp: String? = nil, kind: RevisionKind, anchor: RevisionAnchor? = nil, delta: RevisionDelta) {
        self.revisionId = revisionId
        self.author = author
        self.timestamp = timestamp
        self.kind = kind
        self.anchor = anchor
        self.delta = delta
    }
    private enum CodingKeys: String, CodingKey {
        case revisionId = "revision_id"
        case author = "author"
        case timestamp = "timestamp"
        case kind = "kind"
        case anchor = "anchor"
        case delta = "delta"
    }
}

// MARK: - Internal FFI conversions for DocumentRevision
internal extension DocumentRevision {
    init(_ rb: RustBridge.DocumentRevisionRef) throws {
        self.revisionId = rb.revisionId().toString()
        self.author = rb.author()?.toString()
        self.timestamp = rb.timestamp()?.toString()
        self.kind = RevisionKind(rawValue: rb.kind().toString()) ?? { fatalError("Unknown RevisionKind: \(rb.kind().toString())") }()
        self.anchor = try JSONDecoder().decode(RevisionAnchor?.self, from: ((rb.anchor()?.toString() ?? "null").data(using: .utf8) ?? Data("null".utf8)))
        self.delta = try RevisionDelta(rb.delta())
    }
    func intoRust() throws -> RustBridge.DocumentRevision {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.documentRevisionFromJson(json)
    }
}

/// The content changes that make up a single revision.
///
/// For insertions and deletions the `content` field carries the added/removed
/// lines as `DiffLine::Added` / `DiffLine::Removed` entries. For format
/// changes, `content` is empty — the property diff is left as a TODO for a
/// later enrichment pass.
public struct RevisionDelta: Codable, Sendable, Hashable {
    /// Line-level content changes for this revision.
    public let content: [DiffLine]
    /// Cell-level table changes for this revision.
    public let tableChanges: [CellChange]
    public init(content: [DiffLine], tableChanges: [CellChange]) {
        self.content = content
        self.tableChanges = tableChanges
    }
    private enum CodingKeys: String, CodingKey {
        case content = "content"
        case tableChanges = "table_changes"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.content = try container.decodeIfPresent([DiffLine].self, forKey: .content) ?? []
        self.tableChanges = try container.decodeIfPresent([CellChange].self, forKey: .tableChanges) ?? []
    }
}

// MARK: - Internal FFI conversions for RevisionDelta
internal extension RevisionDelta {
    init(_ rb: RustBridge.RevisionDeltaRef) throws {
        self.content = try rb.content().map { (s: RustStringRef) -> DiffLine in let d = s.as_str().toString().data(using: .utf8) ?? Data(); return try JSONDecoder().decode(DiffLine.self, from: d) }
        self.tableChanges = try rb.tableChanges().map { try CellChange($0) }
    }
    func intoRust() throws -> RustBridge.RevisionDelta {
        let __content = RustVec<RustBridge.DiffLine>()
        for __elem in self.content { __content.push(value: try __elem.intoRust()) }
        let __tableChanges = RustVec<RustBridge.CellChange>()
        for __elem in self.tableChanges { __tableChanges.push(value: try __elem.intoRust()) }
        return RustBridge.RevisionDelta(__content, __tableChanges)
    }
}

/// Extracted table structure.
///
/// Represents a table detected and extracted from a document (PDF, image, etc.).
/// Tables are converted to both structured cell data and Markdown format.
public struct Table: Codable, Sendable, Hashable {
    /// Table cells as a 2D vector (rows × columns)
    public let cells: [[String]]
    /// Markdown representation of the table
    public let markdown: String
    /// Page number where the table was found (1-indexed)
    public let pageNumber: UInt32
    /// Bounding box of the table on the page (PDF coordinates: x0=left, y0=bottom, x1=right, y1=top).
    /// Only populated for PDF-extracted tables when position data is available.
    public let boundingBox: BoundingBox?
    public init(cells: [[String]], markdown: String, pageNumber: UInt32, boundingBox: BoundingBox? = nil) {
        self.cells = cells
        self.markdown = markdown
        self.pageNumber = pageNumber
        self.boundingBox = boundingBox
    }
    private enum CodingKeys: String, CodingKey {
        case cells = "cells"
        case markdown = "markdown"
        case pageNumber = "page_number"
        case boundingBox = "bounding_box"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.cells = try container.decodeIfPresent([[String]].self, forKey: .cells) ?? []
        self.markdown = try container.decodeIfPresent(String.self, forKey: .markdown) ?? ""
        self.pageNumber = try container.decodeIfPresent(UInt32.self, forKey: .pageNumber) ?? 0
        self.boundingBox = try container.decodeIfPresent(BoundingBox.self, forKey: .boundingBox) ?? nil
    }
}

// MARK: - Internal FFI conversions for Table
internal extension Table {
    init(_ rb: RustBridge.TableRef) throws {
        self.cells = try JSONDecoder().decode([[String]].self, from: ((rb.cells().toString()).data(using: .utf8) ?? Data("null".utf8)))
        self.markdown = rb.markdown().toString()
        self.pageNumber = rb.pageNumber()
        self.boundingBox = try rb.boundingBox().map { try BoundingBox($0) }
    }
    func intoRust() throws -> RustBridge.Table {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.tableFromJson(json)
    }
}

/// Individual table cell with content and optional styling.
///
/// Future extension point for rich table support with cell-level metadata.
public struct TableCell: Codable, Sendable, Hashable {
    /// Cell content as text
    public let content: String
    /// Row span (number of rows this cell spans)
    public let rowSpan: UInt32
    /// Column span (number of columns this cell spans)
    public let colSpan: UInt32
    /// Whether this is a header cell
    public let isHeader: Bool
    public init(content: String, rowSpan: UInt32, colSpan: UInt32, isHeader: Bool) {
        self.content = content
        self.rowSpan = rowSpan
        self.colSpan = colSpan
        self.isHeader = isHeader
    }
    private enum CodingKeys: String, CodingKey {
        case content = "content"
        case rowSpan = "row_span"
        case colSpan = "col_span"
        case isHeader = "is_header"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.content = try container.decodeIfPresent(String.self, forKey: .content) ?? ""
        self.rowSpan = try container.decodeIfPresent(UInt32.self, forKey: .rowSpan) ?? 0
        self.colSpan = try container.decodeIfPresent(UInt32.self, forKey: .colSpan) ?? 0
        self.isHeader = try container.decodeIfPresent(Bool.self, forKey: .isHeader) ?? false
    }
}

// MARK: - Internal FFI conversions for TableCell
internal extension TableCell {
    init(_ rb: RustBridge.TableCellRef) throws {
        self.content = rb.content().toString()
        self.rowSpan = rb.rowSpan()
        self.colSpan = rb.colSpan()
        self.isHeader = rb.isHeader()
    }
    func intoRust() throws -> RustBridge.TableCell {
        return RustBridge.TableCell(RustString(self.content), self.rowSpan, self.colSpan, self.isHeader)
    }
}

/// A URI extracted from a document.
///
/// Represents any link, reference, or resource pointer found during extraction.
/// The `kind` field classifies the URI semantically, while `label` carries
/// optional human-readable display text.
public struct ExtractedUri: Codable, Sendable, Hashable {
    /// The URL or path string.
    public let url: String
    /// Optional display text / label for the link.
    public let label: String?
    /// Optional page number where the URI was found (1-indexed).
    public let page: UInt32?
    /// Semantic classification of the URI.
    public let kind: UriKind
    public init(url: String, label: String? = nil, page: UInt32? = nil, kind: UriKind) {
        self.url = url
        self.label = label
        self.page = page
        self.kind = kind
    }
}

// MARK: - Internal FFI conversions for ExtractedUri
internal extension ExtractedUri {
    init(_ rb: RustBridge.ExtractedUriRef) throws {
        self.url = rb.url().toString()
        self.label = rb.label()?.toString()
        self.page = rb.page()
        self.kind = UriKind(rawValue: rb.kind().toString()) ?? { fatalError("Unknown UriKind: \(rb.kind().toString())") }()
    }
    func intoRust() throws -> RustBridge.ExtractedUri {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.extractedUriFromJson(json)
    }
}

/// MIME type detection response.
public struct DetectResponse: Codable, Sendable, Hashable {
    /// Detected MIME type
    public let mimeType: String
    /// Original filename (if provided)
    public let filename: String?
    public init(mimeType: String, filename: String? = nil) {
        self.mimeType = mimeType
        self.filename = filename
    }
    private enum CodingKeys: String, CodingKey {
        case mimeType = "mime_type"
        case filename = "filename"
    }
}

// MARK: - Internal FFI conversions for DetectResponse
internal extension DetectResponse {
    init(_ rb: RustBridge.DetectResponseRef) throws {
        self.mimeType = rb.mimeType().toString()
        self.filename = rb.filename()?.toString()
    }
    func intoRust() throws -> RustBridge.DetectResponse {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.detectResponseFromJson(json)
    }
}

/// Options controlling how two `ExtractionResult` values are compared.
public struct DiffOptions: Codable, Sendable, Hashable {
    /// Include metadata changes in the diff. Default: `true`.
    public let includeMetadata: Bool
    /// Include embedded-children changes in the diff. Default: `true`.
    public let includeEmbedded: Bool
    /// Truncate content to this many characters before diffing.
    ///
    /// Useful for very large documents where only the first N characters matter.
    /// `None` means no truncation.
    public let maxContentChars: UInt?
    public init(includeMetadata: Bool, includeEmbedded: Bool, maxContentChars: UInt? = nil) {
        self.includeMetadata = includeMetadata
        self.includeEmbedded = includeEmbedded
        self.maxContentChars = maxContentChars
    }
    private enum CodingKeys: String, CodingKey {
        case includeMetadata = "include_metadata"
        case includeEmbedded = "include_embedded"
        case maxContentChars = "max_content_chars"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.includeMetadata = try container.decodeIfPresent(Bool.self, forKey: .includeMetadata) ?? true
        self.includeEmbedded = try container.decodeIfPresent(Bool.self, forKey: .includeEmbedded) ?? true
        self.maxContentChars = try container.decodeIfPresent(UInt.self, forKey: .maxContentChars) ?? nil
    }
}

// MARK: - Internal FFI conversions for DiffOptions
internal extension DiffOptions {
    init(_ rb: RustBridge.DiffOptionsRef) throws {
        self.includeMetadata = rb.includeMetadata()
        self.includeEmbedded = rb.includeEmbedded()
        self.maxContentChars = rb.maxContentChars()
    }
    func intoRust() throws -> RustBridge.DiffOptions {
        return RustBridge.DiffOptions(self.includeMetadata, self.includeEmbedded, self.maxContentChars)
    }
}

/// The complete diff between two `ExtractionResult` values.
public typealias ExtractionDiff = RustBridge.ExtractionDiff

/// A single contiguous hunk in a unified diff.
public struct DiffHunk: Codable, Sendable, Hashable {
    /// Starting line number in the old content (0-indexed).
    public let fromLine: UInt
    /// Number of lines from the old content in this hunk.
    public let fromCount: UInt
    /// Starting line number in the new content (0-indexed).
    public let toLine: UInt
    /// Number of lines from the new content in this hunk.
    public let toCount: UInt
    /// Lines that make up this hunk.
    public let lines: [DiffLine]
    public init(fromLine: UInt, fromCount: UInt, toLine: UInt, toCount: UInt, lines: [DiffLine]) {
        self.fromLine = fromLine
        self.fromCount = fromCount
        self.toLine = toLine
        self.toCount = toCount
        self.lines = lines
    }
    private enum CodingKeys: String, CodingKey {
        case fromLine = "from_line"
        case fromCount = "from_count"
        case toLine = "to_line"
        case toCount = "to_count"
        case lines = "lines"
    }
}

// MARK: - Internal FFI conversions for DiffHunk
internal extension DiffHunk {
    init(_ rb: RustBridge.DiffHunkRef) throws {
        self.fromLine = rb.fromLine()
        self.fromCount = rb.fromCount()
        self.toLine = rb.toLine()
        self.toCount = rb.toCount()
        self.lines = try rb.lines().map { (s: RustStringRef) -> DiffLine in let d = s.as_str().toString().data(using: .utf8) ?? Data(); return try JSONDecoder().decode(DiffLine.self, from: d) }
    }
    func intoRust() throws -> RustBridge.DiffHunk {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.diffHunkFromJson(json)
    }
}

/// Cell-level changes for a pair of tables that share the same index.
public struct TableDiff: Codable, Sendable, Hashable {
    /// Zero-based index of the table in both `a.tables` and `b.tables`.
    public let fromIndex: UInt
    /// Zero-based index in `b.tables` (equal to `from_index` for same-dimension tables).
    public let toIndex: UInt
    /// Cell-level changes within the table.
    public let cellChanges: [CellChange]
    public init(fromIndex: UInt, toIndex: UInt, cellChanges: [CellChange]) {
        self.fromIndex = fromIndex
        self.toIndex = toIndex
        self.cellChanges = cellChanges
    }
    private enum CodingKeys: String, CodingKey {
        case fromIndex = "from_index"
        case toIndex = "to_index"
        case cellChanges = "cell_changes"
    }
}

// MARK: - Internal FFI conversions for TableDiff
internal extension TableDiff {
    init(_ rb: RustBridge.TableDiffRef) throws {
        self.fromIndex = rb.fromIndex()
        self.toIndex = rb.toIndex()
        self.cellChanges = try rb.cellChanges().map { try CellChange($0) }
    }
    func intoRust() throws -> RustBridge.TableDiff {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.tableDiffFromJson(json)
    }
}

/// Changes to embedded archive children between two results.
public typealias EmbeddedChanges = RustBridge.EmbeddedChanges

/// Diff for a single embedded archive entry that appears in both results.
public typealias EmbeddedDiff = RustBridge.EmbeddedDiff

/// Preset configurations for common RAG use cases.
///
/// Each preset combines chunk size, overlap, and embedding model
/// to provide an optimized configuration for specific scenarios.
///
/// All string fields are owned `String` for FFI compatibility — instances
/// are safe to clone and pass across language boundaries.
public struct EmbeddingPreset: Codable, Sendable, Hashable {
    public let name: String
    public let chunkSize: UInt
    public let overlap: UInt
    /// HuggingFace repository name for the model.
    public let modelRepo: String
    /// Pooling strategy: "cls" or "mean".
    public let pooling: String
    /// Path to the ONNX model file within the repo.
    public let modelFile: String
    public let dimensions: UInt
    public let description: String
    public init(name: String, chunkSize: UInt, overlap: UInt, modelRepo: String, pooling: String, modelFile: String, dimensions: UInt, description: String) {
        self.name = name
        self.chunkSize = chunkSize
        self.overlap = overlap
        self.modelRepo = modelRepo
        self.pooling = pooling
        self.modelFile = modelFile
        self.dimensions = dimensions
        self.description = description
    }
    private enum CodingKeys: String, CodingKey {
        case name = "name"
        case chunkSize = "chunk_size"
        case overlap = "overlap"
        case modelRepo = "model_repo"
        case pooling = "pooling"
        case modelFile = "model_file"
        case dimensions = "dimensions"
        case description = "description"
    }
}

// MARK: - Internal FFI conversions for EmbeddingPreset
internal extension EmbeddingPreset {
    init(_ rb: RustBridge.EmbeddingPresetRef) throws {
        self.name = rb.name().toString()
        self.chunkSize = rb.chunkSize()
        self.overlap = rb.overlap()
        self.modelRepo = rb.modelRepo().toString()
        self.pooling = rb.pooling().toString()
        self.modelFile = rb.modelFile().toString()
        self.dimensions = rb.dimensions()
        self.description = rb.description().toString()
    }
    func intoRust() throws -> RustBridge.EmbeddingPreset {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.embeddingPresetFromJson(json)
    }
}

/// YAKE-specific parameters.
public struct YakeParams: Codable, Sendable, Hashable {
    /// Window size for co-occurrence analysis (default: 2).
    ///
    /// Controls the context window for computing co-occurrence statistics.
    public let windowSize: UInt
    public init(windowSize: UInt) {
        self.windowSize = windowSize
    }
    private enum CodingKeys: String, CodingKey {
        case windowSize = "window_size"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.windowSize = try container.decodeIfPresent(UInt.self, forKey: .windowSize) ?? 2
    }
}

// MARK: - Internal FFI conversions for YakeParams
internal extension YakeParams {
    init(_ rb: RustBridge.YakeParamsRef) throws {
        self.windowSize = rb.windowSize()
    }
    func intoRust() throws -> RustBridge.YakeParams {
        return RustBridge.YakeParams(self.windowSize)
    }
}

/// RAKE-specific parameters.
public struct RakeParams: Codable, Sendable, Hashable {
    /// Minimum word length to consider (default: 1).
    public let minWordLength: UInt
    /// Maximum words in a keyword phrase (default: 3).
    public let maxWordsPerPhrase: UInt
    public init(minWordLength: UInt, maxWordsPerPhrase: UInt) {
        self.minWordLength = minWordLength
        self.maxWordsPerPhrase = maxWordsPerPhrase
    }
    private enum CodingKeys: String, CodingKey {
        case minWordLength = "min_word_length"
        case maxWordsPerPhrase = "max_words_per_phrase"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.minWordLength = try container.decodeIfPresent(UInt.self, forKey: .minWordLength) ?? 1
        self.maxWordsPerPhrase = try container.decodeIfPresent(UInt.self, forKey: .maxWordsPerPhrase) ?? 3
    }
}

// MARK: - Internal FFI conversions for RakeParams
internal extension RakeParams {
    init(_ rb: RustBridge.RakeParamsRef) throws {
        self.minWordLength = rb.minWordLength()
        self.maxWordsPerPhrase = rb.maxWordsPerPhrase()
    }
    func intoRust() throws -> RustBridge.RakeParams {
        return RustBridge.RakeParams(self.minWordLength, self.maxWordsPerPhrase)
    }
}

/// Keyword extraction configuration.
public struct KeywordConfig: Codable, Sendable, Hashable {
    /// Algorithm to use for extraction.
    public let algorithm: KeywordAlgorithm
    /// Maximum number of keywords to extract (default: 10).
    public let maxKeywords: UInt
    /// Minimum score threshold (0.0-1.0, default: 0.0).
    ///
    /// Keywords with scores below this threshold are filtered out.
    /// Note: Score ranges differ between algorithms.
    public let minScore: Float
    /// N-gram range for keyword extraction (min, max).
    ///
    /// (1, 1) = unigrams only
    /// (1, 2) = unigrams and bigrams
    /// (1, 3) = unigrams, bigrams, and trigrams (default)
    public let ngramRange: [UInt]
    /// Language code for stopword filtering (e.g., "en", "de", "fr").
    ///
    /// If None, no stopword filtering is applied.
    public let language: String?
    /// YAKE-specific tuning parameters.
    public let yakeParams: YakeParams?
    /// RAKE-specific tuning parameters.
    public let rakeParams: RakeParams?
    public init(algorithm: KeywordAlgorithm, maxKeywords: UInt, minScore: Float, ngramRange: [UInt], language: String? = nil, yakeParams: YakeParams? = nil, rakeParams: RakeParams? = nil) {
        self.algorithm = algorithm
        self.maxKeywords = maxKeywords
        self.minScore = minScore
        self.ngramRange = ngramRange
        self.language = language
        self.yakeParams = yakeParams
        self.rakeParams = rakeParams
    }
    private enum CodingKeys: String, CodingKey {
        case algorithm = "algorithm"
        case maxKeywords = "max_keywords"
        case minScore = "min_score"
        case ngramRange = "ngram_range"
        case language = "language"
        case yakeParams = "yake_params"
        case rakeParams = "rake_params"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.algorithm = try container.decode(KeywordAlgorithm.self, forKey: .algorithm)
        self.maxKeywords = try container.decodeIfPresent(UInt.self, forKey: .maxKeywords) ?? 10
        self.minScore = try container.decodeIfPresent(Float.self, forKey: .minScore) ?? 0.0
        self.ngramRange = try container.decodeIfPresent([UInt].self, forKey: .ngramRange) ?? []
        self.language = try container.decodeIfPresent(String.self, forKey: .language) ?? nil
        self.yakeParams = try container.decodeIfPresent(YakeParams.self, forKey: .yakeParams) ?? nil
        self.rakeParams = try container.decodeIfPresent(RakeParams.self, forKey: .rakeParams) ?? nil
    }
}

// MARK: - Internal FFI conversions for KeywordConfig
internal extension KeywordConfig {
    init(_ rb: RustBridge.KeywordConfigRef) throws {
        self.algorithm = KeywordAlgorithm(rawValue: rb.algorithm().toString()) ?? { fatalError("Unknown KeywordAlgorithm: \(rb.algorithm().toString())") }()
        self.maxKeywords = rb.maxKeywords()
        self.minScore = rb.minScore()
        self.ngramRange = Array(rb.ngramRange())
        self.language = rb.language()?.toString()
        self.yakeParams = try rb.yakeParams().map { try YakeParams($0) }
        self.rakeParams = try rb.rakeParams().map { try RakeParams($0) }
    }
    func intoRust() throws -> RustBridge.KeywordConfig {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.keywordConfigFromJson(json)
    }
}

/// Extracted keyword with metadata.
public struct Keyword: Codable, Sendable, Hashable {
    /// The keyword text.
    public let text: String
    /// Relevance score (higher is better, algorithm-specific range).
    public let score: Float
    /// Algorithm that extracted this keyword.
    public let algorithm: KeywordAlgorithm
    /// Optional positions where keyword appears in text (character offsets).
    public let positions: [UInt]?
    public init(text: String, score: Float, algorithm: KeywordAlgorithm, positions: [UInt]? = nil) {
        self.text = text
        self.score = score
        self.algorithm = algorithm
        self.positions = positions
    }
}

// MARK: - Internal FFI conversions for Keyword
internal extension Keyword {
    init(_ rb: RustBridge.KeywordRef) throws {
        self.text = rb.text().toString()
        self.score = rb.score()
        self.algorithm = KeywordAlgorithm(rawValue: rb.algorithm().toString()) ?? { fatalError("Unknown KeywordAlgorithm: \(rb.algorithm().toString())") }()
        self.positions = rb.positions().map { Array($0) }
    }
    func intoRust() throws -> RustBridge.Keyword {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.keywordFromJson(json)
    }
}

/// Configuration for PaddleOCR backend.
///
/// Configures PaddleOCR text detection and recognition with multi-language support.
/// Uses a builder pattern for convenient configuration.
///
/// # Examples
///
/// ```no_run
/// use kreuzberg::PaddleOcrConfig;
///
/// // Create with default English configuration
/// let config = PaddleOcrConfig::new("en");
///
/// // Create with custom cache directory
/// let config = PaddleOcrConfig::new("ch")
///     .with_cache_dir("/path/to/cache".into());
///
/// // Enable table detection
/// let config = PaddleOcrConfig::new("en")
///     .with_table_detection(true);
/// ```
public typealias PaddleOcrConfig = RustBridge.PaddleOcrConfig

/// Combined paths to all models needed for OCR (backward compatibility).
public typealias ModelPaths = RustBridge.ModelPaths

/// Document orientation detection result.
public struct OrientationResult: Codable, Sendable, Hashable {
    /// Detected orientation in degrees (0, 90, 180, or 270).
    public let degrees: UInt32
    /// Confidence score (0.0-1.0).
    public let confidence: Float
    public init(degrees: UInt32, confidence: Float) {
        self.degrees = degrees
        self.confidence = confidence
    }
}

// MARK: - Internal FFI conversions for OrientationResult
internal extension OrientationResult {
    init(_ rb: RustBridge.OrientationResultRef) throws {
        self.degrees = rb.degrees()
        self.confidence = rb.confidence()
    }
    func intoRust() throws -> RustBridge.OrientationResult {
        return RustBridge.OrientationResult(self.degrees, self.confidence)
    }
}

/// Bounding box in original image coordinates (x1, y1) top-left, (x2, y2) bottom-right.
public struct BBox: Codable, Sendable, Hashable {
    public let x1: Float
    public let y1: Float
    public let x2: Float
    public let y2: Float
    public init(x1: Float, y1: Float, x2: Float, y2: Float) {
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2
    }
}

// MARK: - Internal FFI conversions for BBox
internal extension BBox {
    init(_ rb: RustBridge.BBoxRef) throws {
        self.x1 = rb.x1()
        self.y1 = rb.y1()
        self.x2 = rb.x2()
        self.y2 = rb.y2()
    }
    func intoRust() throws -> RustBridge.BBox {
        return RustBridge.BBox(self.x1, self.y1, self.x2, self.y2)
    }
}

/// A single layout detection result.
public struct LayoutDetection: Codable, Sendable, Hashable {
    public let className: LayoutClass
    public let confidence: Float
    public let bbox: BBox
    public init(className: LayoutClass, confidence: Float, bbox: BBox) {
        self.className = className
        self.confidence = confidence
        self.bbox = bbox
    }
    private enum CodingKeys: String, CodingKey {
        case className = "class_name"
        case confidence = "confidence"
        case bbox = "bbox"
    }
}

// MARK: - Internal FFI conversions for LayoutDetection
internal extension LayoutDetection {
    init(_ rb: RustBridge.LayoutDetectionRef) throws {
        self.className = LayoutClass(rawValue: rb.className().toString()) ?? { fatalError("Unknown LayoutClass: \(rb.className().toString())") }()
        self.confidence = rb.confidence()
        self.bbox = try BBox(rb.bbox())
    }
    func intoRust() throws -> RustBridge.LayoutDetection {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.layoutDetectionFromJson(json)
    }
}

/// Pre-computed table markdown for a table detection region.
///
/// Produced by the TATR-based table structure recognizer and surfaced as part of
/// layout-aware OCR results.  The struct lives here (under `layout-types`, pure-Rust)
/// so that consumers who do not enable `layout-detection` (ORT) can still reference
/// the type in their own code.
public struct RecognizedTable: Codable, Sendable, Hashable {
    /// Detection bbox that this table corresponds to (for matching).
    public let detectionBbox: BBox
    /// Table cells as a 2D vector (rows × columns).
    public let cells: [[String]]
    /// Rendered markdown table.
    public let markdown: String
    public init(detectionBbox: BBox, cells: [[String]], markdown: String) {
        self.detectionBbox = detectionBbox
        self.cells = cells
        self.markdown = markdown
    }
    private enum CodingKeys: String, CodingKey {
        case detectionBbox = "detection_bbox"
        case cells = "cells"
        case markdown = "markdown"
    }
}

// MARK: - Internal FFI conversions for RecognizedTable
internal extension RecognizedTable {
    init(_ rb: RustBridge.RecognizedTableRef) throws {
        self.detectionBbox = try BBox(rb.detectionBbox())
        self.cells = try JSONDecoder().decode([[String]].self, from: ((rb.cells().toString()).data(using: .utf8) ?? Data("null".utf8)))
        self.markdown = rb.markdown().toString()
    }
    func intoRust() throws -> RustBridge.RecognizedTable {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.recognizedTableFromJson(json)
    }
}

/// Page-level detection result containing all detections and page metadata.
public struct DetectionResult: Codable, Sendable, Hashable {
    public let pageWidth: UInt32
    public let pageHeight: UInt32
    public let detections: [LayoutDetection]
    public init(pageWidth: UInt32, pageHeight: UInt32, detections: [LayoutDetection]) {
        self.pageWidth = pageWidth
        self.pageHeight = pageHeight
        self.detections = detections
    }
    private enum CodingKeys: String, CodingKey {
        case pageWidth = "page_width"
        case pageHeight = "page_height"
        case detections = "detections"
    }
}

// MARK: - Internal FFI conversions for DetectionResult
internal extension DetectionResult {
    init(_ rb: RustBridge.DetectionResultRef) throws {
        self.pageWidth = rb.pageWidth()
        self.pageHeight = rb.pageHeight()
        self.detections = try rb.detections().map { try LayoutDetection($0) }
    }
    func intoRust() throws -> RustBridge.DetectionResult {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "{}"
        return try RustBridge.detectionResultFromJson(json)
    }
}

/// Embedded file descriptor extracted from the PDF name tree.
public typealias EmbeddedFile = RustBridge.EmbeddedFile

/// PDF-specific metadata.
///
/// Contains metadata fields specific to PDF documents that are not in the common
/// `Metadata` structure. Common fields like title, authors, keywords, and dates
/// are at the `Metadata` level.
public struct PdfMetadata: Codable, Sendable, Hashable {
    /// PDF version (e.g., "1.7", "2.0")
    public let pdfVersion: String?
    /// PDF producer (application that created the PDF)
    public let producer: String?
    /// Whether the PDF is encrypted/password-protected
    public let isEncrypted: Bool?
    /// First page width in points (1/72 inch)
    public let width: Int64?
    /// First page height in points (1/72 inch)
    public let height: Int64?
    /// Total number of pages in the PDF document
    public let pageCount: UInt32?
    public init(pdfVersion: String? = nil, producer: String? = nil, isEncrypted: Bool? = nil, width: Int64? = nil, height: Int64? = nil, pageCount: UInt32? = nil) {
        self.pdfVersion = pdfVersion
        self.producer = producer
        self.isEncrypted = isEncrypted
        self.width = width
        self.height = height
        self.pageCount = pageCount
    }
    private enum CodingKeys: String, CodingKey {
        case pdfVersion = "pdf_version"
        case producer = "producer"
        case isEncrypted = "is_encrypted"
        case width = "width"
        case height = "height"
        case pageCount = "page_count"
    }
    public init(from decoder: any Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        self.pdfVersion = try container.decodeIfPresent(String.self, forKey: .pdfVersion) ?? nil
        self.producer = try container.decodeIfPresent(String.self, forKey: .producer) ?? nil
        self.isEncrypted = try container.decodeIfPresent(Bool.self, forKey: .isEncrypted) ?? nil
        self.width = try container.decodeIfPresent(Int64.self, forKey: .width) ?? nil
        self.height = try container.decodeIfPresent(Int64.self, forKey: .height) ?? nil
        self.pageCount = try container.decodeIfPresent(UInt32.self, forKey: .pageCount) ?? nil
    }
}

// MARK: - Internal FFI conversions for PdfMetadata
internal extension PdfMetadata {
    init(_ rb: RustBridge.PdfMetadataRef) throws {
        self.pdfVersion = rb.pdfVersion()?.toString()
        self.producer = rb.producer()?.toString()
        self.isEncrypted = rb.isEncrypted()
        self.width = rb.width()
        self.height = rb.height()
        self.pageCount = rb.pageCount()
    }
    func intoRust() throws -> RustBridge.PdfMetadata {
        return RustBridge.PdfMetadata(self.pdfVersion.map(RustString.init), self.producer.map(RustString.init), self.isEncrypted, self.width, self.height, self.pageCount)
    }
}

/// ONNX Runtime execution provider type.
///
/// Determines which hardware backend is used for model inference.
/// `Auto` (default) selects the best available provider per platform.
public enum ExecutionProviderType: String, Codable, Sendable, Hashable {
    /// Auto-select: CoreML on macOS, CUDA on Linux, CPU elsewhere.
    case auto
    /// CPU execution provider (always available).
    case cpu
    /// Apple CoreML (macOS/iOS Neural Engine + GPU).
    case coreMl = "coreml"
    /// NVIDIA CUDA GPU acceleration.
    case cuda
    /// NVIDIA TensorRT (optimized CUDA inference).
    case tensorRt = "tensorrt"
}
extension ExecutionProviderType {
    func intoRust() throws -> RustBridge.ExecutionProviderType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.executionProviderTypeFromJson(json)
    }
}

/// Output format for extraction results.
///
/// Controls the format of the `content` field in `ExtractionResult`.
/// When set to `Markdown`, `Djot`, or `Html`, the output uses that format.
/// `Plain` returns the raw extracted text.
/// `Structured` returns JSON with full OCR element data including bounding
/// boxes and confidence scores.
public enum OutputFormat: Codable, Sendable, Hashable {
    /// Plain text content only (default)
    case plain
    /// Markdown format
    case markdown
    /// Djot markup format
    case djot
    /// HTML format
    case html
    /// JSON tree format with heading-driven sections.
    case json
    /// Structured JSON format with full OCR element metadata.
    case structured
    /// Custom renderer registered via the RendererRegistry.
    /// The string is the renderer name (e.g., "docx", "latex").
    case custom(field0: String)
}
extension OutputFormat {
    func intoRust() throws -> RustBridge.OutputFormat {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.outputFormatFromJson(json)
    }
}

/// Built-in HTML theme selection.
public enum HtmlTheme: String, Codable, Sendable, Hashable {
    /// Sensible defaults: system font stack, neutral colours, readable line
    /// measure. CSS custom properties (`--kb-*`) are all defined so user CSS
    /// can override individual values.
    case `default`
    /// GitHub Markdown-inspired palette and spacing.
    case gitHub = "github"
    /// Dark background, light text.
    case dark
    /// Minimal light theme with generous whitespace.
    case light
    /// No built-in stylesheet emitted. CSS custom properties are still defined
    /// on `:root` so user stylesheets can reference `var(--kb-*)` tokens.
    case unstyled
}
extension HtmlTheme {
    func intoRust() throws -> RustBridge.HtmlTheme {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.htmlThemeFromJson(json)
    }
}

/// Which table structure recognition model to use.
///
/// Controls the model used for table cell detection within layout-detected
/// table regions. Wire format is snake_case in all serializers (JSON, TOML,
/// YAML).
public enum TableModel: String, Codable, Sendable, Hashable {
    /// TATR (Table Transformer) -- default, 30MB, DETR-based row/column detection.
    case tatr
    /// SLANeXT wired variant -- 365MB, optimized for bordered tables.
    case slanetWired = "slanet_wired"
    /// SLANeXT wireless variant -- 365MB, optimized for borderless tables.
    case slanetWireless = "slanet_wireless"
    /// SLANet-plus -- 7.78MB, lightweight general-purpose.
    case slanetPlus = "slanet_plus"
    /// Classifier-routed SLANeXT: auto-select wired/wireless per table.
    /// Uses PP-LCNet classifier (6.78MB) + both SLANeXT variants (730MB total).
    case slanetAuto = "slanet_auto"
    /// Disable table structure model inference entirely; use heuristic path only.
    case disabled
}
extension TableModel {
    func intoRust() throws -> RustBridge.TableModel {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.tableModelFromJson(json)
    }
}

/// Type of text chunker to use.
///
/// # Variants
///
/// * `Text` - Generic text splitter, splits on whitespace and punctuation
/// * `Markdown` - Markdown-aware splitter, preserves formatting and structure
/// * `Yaml` - YAML-aware splitter, creates one chunk per top-level key
/// * `Semantic` - Topic-aware chunker. With an `EmbeddingConfig`, splits at
///   embedding-based topic shifts tuned by `topic_threshold` (default 0.75,
///   lower = more splits). Without an embedding, falls back to a
///   structural-boundary heuristic (ALL-CAPS headers, numbered sections,
///   blank-line paragraphs) and merges groups into chunks capped at
///   `max_characters` (default 1000). `topic_threshold` has no effect in the
///   fallback path. For best results, pair with an embedding model.
public enum ChunkerType: String, Codable, Sendable, Hashable {
    case text
    case markdown
    case yaml
    case semantic
}
extension ChunkerType {
    func intoRust() throws -> RustBridge.ChunkerType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.chunkerTypeFromJson(json)
    }
}

/// How chunk size is measured.
///
/// Defaults to `Characters` (Unicode character count). When using token-based sizing,
/// chunks are sized by token count according to the specified tokenizer.
///
/// Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer
/// available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers
/// (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`).
public enum ChunkSizing: Codable, Sendable, Hashable {
    /// Size measured in Unicode characters (default).
    case characters
    /// Size measured in tokens from a HuggingFace tokenizer.
    case tokenizer(model: String, cacheDir: URL?)

    private enum CodingKeys: String, CodingKey {
        case type
        case cacheDir = "cache_dir"
        case model
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .type)
        switch type {
        case "characters":
            self = .characters
        case "tokenizer":
            self = .tokenizer(model: try container.decode(String.self, forKey: .model), cacheDir: try container.decodeIfPresent(URL.self, forKey: .cacheDir))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .type,
                in: container,
                debugDescription: "Unknown ChunkSizing type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .characters:
            try container.encode("characters", forKey: .type)
        case .tokenizer(let model, let cacheDir):
            try container.encode("tokenizer", forKey: .type)
            try container.encode(model, forKey: .model)
            try container.encodeIfPresent(cacheDir, forKey: .cacheDir)
        }
    }
}
extension ChunkSizing {
    func intoRust() throws -> RustBridge.ChunkSizing {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.chunkSizingFromJson(json)
    }
}

/// Embedding model types supported by Kreuzberg.
public enum EmbeddingModelType: Codable, Sendable, Hashable {
    /// Use a preset model configuration (recommended)
    case preset(name: String)
    /// Use a custom ONNX model from HuggingFace
    case custom(modelId: String, dimensions: UInt)
    /// Provider-hosted embedding model via liter-llm.
    ///
    /// Uses the model specified in the nested `LlmConfig` (e.g.,
    /// `"openai/text-embedding-3-small"`).
    case llm(llm: LlmConfig)
    /// In-process embedding backend registered via the plugin system.
    ///
    /// The caller registers an [`EmbeddingBackend`](crate::plugins::EmbeddingBackend) once
    /// (e.g. a wrapper around an already-loaded `llama-cpp-python`, `sentence-transformers`,
    /// or tuned ONNX model), then references it by name in config. Kreuzberg calls back
    /// into the registered backend during chunking and standalone embed requests —
    /// no HuggingFace download, no ONNX Runtime requirement, no HTTP sidecar.
    ///
    /// When this variant is selected, only the following [`EmbeddingConfig`] fields
    /// apply: `normalize` (post-call L2 normalization) and `max_embed_duration_secs`
    /// (dispatcher timeout). Model-loading fields (`batch_size`, `cache_dir`,
    /// `show_download_progress`, `acceleration`) are ignored — the host owns the
    /// model lifecycle.
    ///
    /// Semantic chunking falls back to [`ChunkingConfig::max_characters`] when this variant
    /// is used, since there is no preset to look a chunk-size ceiling up against — size your
    /// context window via `max_characters` directly.
    ///
    /// See `register_embedding_backend`.
    case plugin(name: String)

    private enum CodingKeys: String, CodingKey {
        case type
        case dimensions
        case llm
        case modelId = "model_id"
        case name
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .type)
        switch type {
        case "preset":
            self = .preset(name: try container.decode(String.self, forKey: .name))
        case "custom":
            self = .custom(modelId: try container.decode(String.self, forKey: .modelId), dimensions: try container.decode(UInt.self, forKey: .dimensions))
        case "llm":
            self = .llm(llm: try container.decode(LlmConfig.self, forKey: .llm))
        case "plugin":
            self = .plugin(name: try container.decode(String.self, forKey: .name))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .type,
                in: container,
                debugDescription: "Unknown EmbeddingModelType type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .preset(let name):
            try container.encode("preset", forKey: .type)
            try container.encode(name, forKey: .name)
        case .custom(let modelId, let dimensions):
            try container.encode("custom", forKey: .type)
            try container.encode(modelId, forKey: .modelId)
            try container.encode(dimensions, forKey: .dimensions)
        case .llm(let llm):
            try container.encode("llm", forKey: .type)
            try container.encode(llm, forKey: .llm)
        case .plugin(let name):
            try container.encode("plugin", forKey: .type)
            try container.encode(name, forKey: .name)
        }
    }
}
extension EmbeddingModelType {
    func intoRust() throws -> RustBridge.EmbeddingModelType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.embeddingModelTypeFromJson(json)
    }
}

/// Content rendering mode for code extraction.
///
/// Controls how extracted code content is represented in the `content` field
/// of `ExtractionResult`.
public enum CodeContentMode: String, Codable, Sendable, Hashable {
    /// Use TSLP semantic chunks as content (default).
    case chunks
    /// Use raw source code as content.
    case raw
    /// Emit function/class headings + docstrings (no code bodies).
    case structure
}
extension CodeContentMode {
    func intoRust() throws -> RustBridge.CodeContentMode {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.codeContentModeFromJson(json)
    }
}

/// Type of list detection.
public typealias ListType = RustBridge.ListType

/// OCR backend types.
public enum OcrBackendType: String, Codable, Sendable, Hashable {
    /// Tesseract OCR (native Rust binding)
    case tesseract = "Tesseract"
    /// EasyOCR (Python-based, via FFI)
    case easyOcr = "EasyOCR"
    /// PaddleOCR (Python-based, via FFI)
    case paddleOcr = "PaddleOCR"
    /// Custom/third-party OCR backend
    case custom = "Custom"
}
extension OcrBackendType {
    func intoRust() throws -> RustBridge.OcrBackendType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.ocrBackendTypeFromJson(json)
    }
}

/// Processing stages for post-processors.
///
/// Post-processors are executed in stage order (Early → Middle → Late).
/// Use stages to control the order of post-processing operations.
public enum ProcessingStage: String, Codable, Sendable, Hashable {
    /// Early stage - foundational processing.
    ///
    /// Use for:
    /// - Language detection
    /// - Character encoding normalization
    /// - Entity extraction (NER)
    /// - Text quality scoring
    case early = "Early"
    /// Middle stage - content transformation.
    ///
    /// Use for:
    /// - Keyword extraction
    /// - Token reduction
    /// - Text summarization
    /// - Semantic analysis
    case middle = "Middle"
    /// Late stage - final enrichment.
    ///
    /// Use for:
    /// - Custom user hooks
    /// - Analytics/logging
    /// - Final validation
    /// - Output formatting
    case late = "Late"
}
extension ProcessingStage {
    func intoRust() throws -> RustBridge.ProcessingStage {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.processingStageFromJson(json)
    }
}

public enum ReductionLevel: String, Codable, Sendable, Hashable {
    case off = "Off"
    case light = "Light"
    case moderate = "Moderate"
    case aggressive = "Aggressive"
    case maximum = "Maximum"
}
extension ReductionLevel {
    func intoRust() throws -> RustBridge.ReductionLevel {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.reductionLevelFromJson(json)
    }
}

/// Type of PDF annotation.
public enum PdfAnnotationType: String, Codable, Sendable, Hashable {
    /// Sticky note / text annotation
    case text
    /// Highlighted text region
    case highlight
    /// Hyperlink annotation
    case link
    /// Rubber stamp annotation
    case stamp
    /// Underline text markup
    case underline
    /// Strikeout text markup
    case strikeOut = "strike_out"
    /// Any other annotation type
    case other
}
extension PdfAnnotationType {
    func intoRust() throws -> RustBridge.PdfAnnotationType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.pdfAnnotationTypeFromJson(json)
    }
}

/// Types of block-level elements in Djot.
public enum BlockType: String, Codable, Sendable, Hashable {
    case paragraph
    case heading
    case blockquote
    case codeBlock = "code_block"
    case listItem = "list_item"
    case orderedList = "ordered_list"
    case bulletList = "bullet_list"
    case taskList = "task_list"
    case definitionList = "definition_list"
    case definitionTerm = "definition_term"
    case definitionDescription = "definition_description"
    case div
    case section
    case thematicBreak = "thematic_break"
    case rawBlock = "raw_block"
    case mathDisplay = "math_display"
}
extension BlockType {
    func intoRust() throws -> RustBridge.BlockType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.blockTypeFromJson(json)
    }
}

/// Types of inline elements in Djot.
public enum InlineType: String, Codable, Sendable, Hashable {
    case text
    case strong
    case emphasis
    case highlight
    case `subscript`
    case superscript
    case insert
    case delete
    case code
    case link
    case image
    case span
    case math
    case rawInline = "raw_inline"
    case footnoteRef = "footnote_ref"
    case symbol
}
extension InlineType {
    func intoRust() throws -> RustBridge.InlineType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.inlineTypeFromJson(json)
    }
}

/// Semantic kind of a relationship between document elements.
public enum RelationshipKind: String, Codable, Sendable, Hashable {
    /// Footnote marker -> footnote definition.
    case footnoteReference = "footnote_reference"
    /// Citation marker -> bibliography entry.
    case citationReference = "citation_reference"
    /// Internal anchor link (`#id`) -> target heading/element.
    case internalLink = "internal_link"
    /// Caption paragraph -> figure/table it describes.
    case caption
    /// Label -> labeled element (HTML `<label for>`, LaTeX `\label{}`).
    case label
    /// TOC entry -> target section.
    case tocEntry = "toc_entry"
    /// Cross-reference (LaTeX `\ref{}`, DOCX cross-reference field).
    case crossReference = "cross_reference"
}
extension RelationshipKind {
    func intoRust() throws -> RustBridge.RelationshipKind {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.relationshipKindFromJson(json)
    }
}

/// Content layer classification for document nodes.
///
/// Replaces separate body/furniture arrays with per-node granularity.
public enum ContentLayer: String, Codable, Sendable, Hashable {
    /// Main document body content.
    case body
    /// Page/section header (running header).
    case header
    /// Page/section footer (running footer).
    case footer
    /// Footnote content.
    case footnote
}
extension ContentLayer {
    func intoRust() throws -> RustBridge.ContentLayer {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.contentLayerFromJson(json)
    }
}

/// Tagged enum for node content. Each variant carries only type-specific data.
///
/// Uses `#[serde(tag = "node_type")]` to avoid "type" keyword collision in
/// Go/Java/TypeScript bindings.
public enum NodeContent: Codable, Sendable, Hashable {
    /// Document title.
    case title(text: String)
    /// Section heading with level (1-6).
    case heading(level: UInt8, text: String)
    /// Body text paragraph.
    case paragraph(text: String)
    /// List container — children are `ListItem` nodes.
    case list(ordered: Bool)
    /// Individual list item.
    case listItem(text: String)
    /// Table with structured cell grid.
    case table(grid: TableGrid)
    /// Image reference.
    case image(description: String?, imageIndex: UInt32?, src: String?)
    /// Code block.
    case code(text: String, language: String?)
    /// Block quote — container, children carry the quoted content.
    case quote
    /// Mathematical formula / equation.
    case formula(text: String)
    /// Footnote reference content.
    case footnote(text: String)
    /// Logical grouping container (section, key-value area).
    ///
    /// `heading_level` + `heading_text` capture the section heading directly
    /// rather than relying on a first-child positional convention.
    case group(label: String?, headingLevel: UInt8?, headingText: String?)
    /// Page break marker.
    case pageBreak
    /// Presentation slide container — children are the slide's content nodes.
    case slide(number: UInt32, title: String?)
    /// Definition list container — children are `DefinitionItem` nodes.
    case definitionList
    /// Individual definition list entry with term and definition.
    case definitionItem(term: String, definition: String)
    /// Citation or bibliographic reference.
    case citation(key: String, text: String)
    /// Admonition / callout container (note, warning, tip, etc.).
    ///
    /// Children carry the admonition body content.
    case admonition(kind: String, title: String?)
    /// Raw block preserved verbatim from the source format.
    ///
    /// Used for content that cannot be mapped to a semantic node type
    /// (e.g. JSX in MDX, raw LaTeX in markdown, embedded HTML).
    case rawBlock(format: String, content: String)
    /// Structured metadata block (email headers, YAML frontmatter, etc.).
    case metadataBlock(entries: [[String]])

    private enum CodingKeys: String, CodingKey {
        case node_type
        case content
        case definition
        case description
        case entries
        case format
        case grid
        case headingLevel = "heading_level"
        case headingText = "heading_text"
        case imageIndex = "image_index"
        case key
        case kind
        case label
        case language
        case level
        case number
        case ordered
        case src
        case term
        case text
        case title
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .node_type)
        switch type {
        case "title":
            self = .title(text: try container.decode(String.self, forKey: .text))
        case "heading":
            self = .heading(level: try container.decode(UInt8.self, forKey: .level), text: try container.decode(String.self, forKey: .text))
        case "paragraph":
            self = .paragraph(text: try container.decode(String.self, forKey: .text))
        case "list":
            self = .list(ordered: try container.decode(Bool.self, forKey: .ordered))
        case "list_item":
            self = .listItem(text: try container.decode(String.self, forKey: .text))
        case "table":
            self = .table(grid: try container.decode(TableGrid.self, forKey: .grid))
        case "image":
            self = .image(description: try container.decodeIfPresent(String.self, forKey: .description), imageIndex: try container.decodeIfPresent(UInt32.self, forKey: .imageIndex), src: try container.decodeIfPresent(String.self, forKey: .src))
        case "code":
            self = .code(text: try container.decode(String.self, forKey: .text), language: try container.decodeIfPresent(String.self, forKey: .language))
        case "quote":
            self = .quote
        case "formula":
            self = .formula(text: try container.decode(String.self, forKey: .text))
        case "footnote":
            self = .footnote(text: try container.decode(String.self, forKey: .text))
        case "group":
            self = .group(label: try container.decodeIfPresent(String.self, forKey: .label), headingLevel: try container.decodeIfPresent(UInt8.self, forKey: .headingLevel), headingText: try container.decodeIfPresent(String.self, forKey: .headingText))
        case "page_break":
            self = .pageBreak
        case "slide":
            self = .slide(number: try container.decode(UInt32.self, forKey: .number), title: try container.decodeIfPresent(String.self, forKey: .title))
        case "definition_list":
            self = .definitionList
        case "definition_item":
            self = .definitionItem(term: try container.decode(String.self, forKey: .term), definition: try container.decode(String.self, forKey: .definition))
        case "citation":
            self = .citation(key: try container.decode(String.self, forKey: .key), text: try container.decode(String.self, forKey: .text))
        case "admonition":
            self = .admonition(kind: try container.decode(String.self, forKey: .kind), title: try container.decodeIfPresent(String.self, forKey: .title))
        case "raw_block":
            self = .rawBlock(format: try container.decode(String.self, forKey: .format), content: try container.decode(String.self, forKey: .content))
        case "metadata_block":
            self = .metadataBlock(entries: try container.decode([[String]].self, forKey: .entries))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .node_type,
                in: container,
                debugDescription: "Unknown NodeContent type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .title(let text):
            try container.encode("title", forKey: .node_type)
            try container.encode(text, forKey: .text)
        case .heading(let level, let text):
            try container.encode("heading", forKey: .node_type)
            try container.encode(level, forKey: .level)
            try container.encode(text, forKey: .text)
        case .paragraph(let text):
            try container.encode("paragraph", forKey: .node_type)
            try container.encode(text, forKey: .text)
        case .list(let ordered):
            try container.encode("list", forKey: .node_type)
            try container.encode(ordered, forKey: .ordered)
        case .listItem(let text):
            try container.encode("list_item", forKey: .node_type)
            try container.encode(text, forKey: .text)
        case .table(let grid):
            try container.encode("table", forKey: .node_type)
            try container.encode(grid, forKey: .grid)
        case .image(let description, let imageIndex, let src):
            try container.encode("image", forKey: .node_type)
            try container.encodeIfPresent(description, forKey: .description)
            try container.encodeIfPresent(imageIndex, forKey: .imageIndex)
            try container.encodeIfPresent(src, forKey: .src)
        case .code(let text, let language):
            try container.encode("code", forKey: .node_type)
            try container.encode(text, forKey: .text)
            try container.encodeIfPresent(language, forKey: .language)
        case .quote:
            try container.encode("quote", forKey: .node_type)
        case .formula(let text):
            try container.encode("formula", forKey: .node_type)
            try container.encode(text, forKey: .text)
        case .footnote(let text):
            try container.encode("footnote", forKey: .node_type)
            try container.encode(text, forKey: .text)
        case .group(let label, let headingLevel, let headingText):
            try container.encode("group", forKey: .node_type)
            try container.encodeIfPresent(label, forKey: .label)
            try container.encodeIfPresent(headingLevel, forKey: .headingLevel)
            try container.encodeIfPresent(headingText, forKey: .headingText)
        case .pageBreak:
            try container.encode("page_break", forKey: .node_type)
        case .slide(let number, let title):
            try container.encode("slide", forKey: .node_type)
            try container.encode(number, forKey: .number)
            try container.encodeIfPresent(title, forKey: .title)
        case .definitionList:
            try container.encode("definition_list", forKey: .node_type)
        case .definitionItem(let term, let definition):
            try container.encode("definition_item", forKey: .node_type)
            try container.encode(term, forKey: .term)
            try container.encode(definition, forKey: .definition)
        case .citation(let key, let text):
            try container.encode("citation", forKey: .node_type)
            try container.encode(key, forKey: .key)
            try container.encode(text, forKey: .text)
        case .admonition(let kind, let title):
            try container.encode("admonition", forKey: .node_type)
            try container.encode(kind, forKey: .kind)
            try container.encodeIfPresent(title, forKey: .title)
        case .rawBlock(let format, let content):
            try container.encode("raw_block", forKey: .node_type)
            try container.encode(format, forKey: .format)
            try container.encode(content, forKey: .content)
        case .metadataBlock(let entries):
            try container.encode("metadata_block", forKey: .node_type)
            try container.encode(entries, forKey: .entries)
        }
    }
}
extension NodeContent {
    func intoRust() throws -> RustBridge.NodeContent {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.nodeContentFromJson(json)
    }
}

/// Types of inline text annotations.
public enum AnnotationKind: Codable, Sendable, Hashable {
    case bold
    case italic
    case underline
    case strikethrough
    case code
    case `subscript`
    case superscript
    case link(url: String, title: String?)
    /// Highlighted text (PDF highlights, HTML `<mark>`).
    case highlight
    /// Text color (CSS-compatible value, e.g. "#ff0000", "red").
    case color(value: String)
    /// Font size with units (e.g. "12pt", "1.2em", "16px").
    case fontSize(value: String)
    /// Extensible annotation for format-specific styling.
    case custom(name: String, value: String?)

    private enum CodingKeys: String, CodingKey {
        case annotation_type
        case name
        case title
        case url
        case value
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .annotation_type)
        switch type {
        case "bold":
            self = .bold
        case "italic":
            self = .italic
        case "underline":
            self = .underline
        case "strikethrough":
            self = .strikethrough
        case "code":
            self = .code
        case "subscript":
            self = .`subscript`
        case "superscript":
            self = .superscript
        case "link":
            self = .link(url: try container.decode(String.self, forKey: .url), title: try container.decodeIfPresent(String.self, forKey: .title))
        case "highlight":
            self = .highlight
        case "color":
            self = .color(value: try container.decode(String.self, forKey: .value))
        case "font_size":
            self = .fontSize(value: try container.decode(String.self, forKey: .value))
        case "custom":
            self = .custom(name: try container.decode(String.self, forKey: .name), value: try container.decodeIfPresent(String.self, forKey: .value))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .annotation_type,
                in: container,
                debugDescription: "Unknown AnnotationKind type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .bold:
            try container.encode("bold", forKey: .annotation_type)
        case .italic:
            try container.encode("italic", forKey: .annotation_type)
        case .underline:
            try container.encode("underline", forKey: .annotation_type)
        case .strikethrough:
            try container.encode("strikethrough", forKey: .annotation_type)
        case .code:
            try container.encode("code", forKey: .annotation_type)
        case .`subscript`:
            try container.encode("subscript", forKey: .annotation_type)
        case .superscript:
            try container.encode("superscript", forKey: .annotation_type)
        case .link(let url, let title):
            try container.encode("link", forKey: .annotation_type)
            try container.encode(url, forKey: .url)
            try container.encodeIfPresent(title, forKey: .title)
        case .highlight:
            try container.encode("highlight", forKey: .annotation_type)
        case .color(let value):
            try container.encode("color", forKey: .annotation_type)
            try container.encode(value, forKey: .value)
        case .fontSize(let value):
            try container.encode("font_size", forKey: .annotation_type)
            try container.encode(value, forKey: .value)
        case .custom(let name, let value):
            try container.encode("custom", forKey: .annotation_type)
            try container.encode(name, forKey: .name)
            try container.encodeIfPresent(value, forKey: .value)
        }
    }
}
extension AnnotationKind {
    func intoRust() throws -> RustBridge.AnnotationKind {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.annotationKindFromJson(json)
    }
}

/// How the extracted text was produced.
public enum ExtractionMethod: String, Codable, Sendable, Hashable {
    case native
    case ocr
    case mixed
}
extension ExtractionMethod {
    func intoRust() throws -> RustBridge.ExtractionMethod {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.extractionMethodFromJson(json)
    }
}

/// Semantic structural classification of a text chunk.
///
/// Assigned by the heuristic classifier in `chunking::classifier`.
/// Defaults to `Unknown` when no rule matches.
/// Designed to be extended in future versions without breaking changes.
public enum ChunkType: String, Codable, Sendable, Hashable {
    /// Section heading or document title.
    case heading
    /// Party list: names, addresses, and signatories.
    case partyList = "party_list"
    /// Definition clause ("X means…", "X shall mean…").
    case definitions
    /// Operative clause containing legal/contractual action verbs.
    case operativeClause = "operative_clause"
    /// Signature block with signatures, names, and dates.
    case signatureBlock = "signature_block"
    /// Schedule, annex, appendix, or exhibit section.
    case schedule
    /// Table-like content with aligned columns or repeated patterns.
    case tableLike = "table_like"
    /// Mathematical formula or equation.
    case formula
    /// Code block or preformatted content.
    case codeBlock = "code_block"
    /// Embedded or referenced image content.
    case image
    /// Organizational chart or hierarchy diagram.
    case orgChart = "org_chart"
    /// Diagram, figure, or visual illustration.
    case diagram
    /// Unclassified or mixed content.
    case unknown
}
extension ChunkType {
    func intoRust() throws -> RustBridge.ChunkType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.chunkTypeFromJson(json)
    }
}

/// Heuristic classification of what an image likely depicts.
public enum ImageKind: String, Codable, Sendable, Hashable {
    /// Photographic image (natural scene, photograph)
    case photograph
    /// Technical or schematic diagram
    case diagram
    /// Chart, graph, or plot
    case chart
    /// Freehand or technical drawing
    case drawing
    /// Text-heavy image (scanned text, document)
    case textBlock = "text_block"
    /// Decorative element or border
    case decoration
    /// Logo or brand mark
    case logo
    /// Small icon
    case icon
    /// Fragment of a larger tiled image (tile of a technical drawing)
    case tileFragment = "tile_fragment"
    /// Mask or transparency map
    case mask
    /// Full-page render produced during OCR preprocessing; used as a citation thumbnail.
    case pageRaster = "page_raster"
    /// Could not classify with reasonable confidence
    case unknown
}
extension ImageKind {
    func intoRust() throws -> RustBridge.ImageKind {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.imageKindFromJson(json)
    }
}

/// Result-shape selection for extraction results.
///
/// Distinct from `OutputFormat` (which controls rendering — Plain, Markdown,
/// HTML, etc.). `ResultFormat` controls the *shape* of the result: a unified content
/// blob vs. an element-based decomposition.
public enum ResultFormat: String, Codable, Sendable, Hashable {
    /// Unified format with all content in `content` field
    case unified
    /// Element-based format with semantic element extraction
    case elementBased = "element_based"
}
extension ResultFormat {
    func intoRust() throws -> RustBridge.ResultFormat {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.resultFormatFromJson(json)
    }
}

/// Semantic element type classification.
///
/// Categorizes text content into semantic units for downstream processing.
/// Supports the element types commonly found in Unstructured documents.
public enum ElementType: String, Codable, Sendable, Hashable {
    /// Document title
    case title
    /// Main narrative text body
    case narrativeText = "narrative_text"
    /// Section heading
    case heading
    /// List item (bullet, numbered, etc.)
    case listItem = "list_item"
    /// Table element
    case table
    /// Image element
    case image
    /// Page break marker
    case pageBreak = "page_break"
    /// Code block
    case codeBlock = "code_block"
    /// Block quote
    case blockQuote = "block_quote"
    /// Footer text
    case footer
    /// Header text
    case header
}
extension ElementType {
    func intoRust() throws -> RustBridge.ElementType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.elementTypeFromJson(json)
    }
}

/// Format-specific metadata (discriminated union).
///
/// Only one format type can exist per extraction result. This provides
/// type-safe, clean metadata without nested optionals.
public typealias FormatMetadata = RustBridge.FormatMetadata

/// Text direction enumeration for HTML documents.
public enum TextDirection: String, Codable, Sendable, Hashable {
    /// Left-to-right text direction
    case leftToRight = "ltr"
    /// Right-to-left text direction
    case rightToLeft = "rtl"
    /// Automatic text direction detection
    case auto
}
extension TextDirection {
    func intoRust() throws -> RustBridge.TextDirection {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.textDirectionFromJson(json)
    }
}

/// Link type classification.
public enum LinkType: String, Codable, Sendable, Hashable {
    /// Anchor link (#section)
    case anchor
    /// Internal link (same domain)
    case `internal`
    /// External link (different domain)
    case external
    /// Email link (mailto:)
    case email
    /// Phone link (tel:)
    case phone
    /// Other link type
    case other
}
extension LinkType {
    func intoRust() throws -> RustBridge.LinkType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.linkTypeFromJson(json)
    }
}

/// Image type classification.
public enum ImageType: String, Codable, Sendable, Hashable {
    /// Data URI image
    case dataUri = "data-uri"
    /// Inline SVG
    case inlineSvg = "inline-svg"
    /// External image URL
    case external
    /// Relative path image
    case relative
}
extension ImageType {
    func intoRust() throws -> RustBridge.ImageType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.imageTypeFromJson(json)
    }
}

/// Structured data type classification.
public enum StructuredDataType: String, Codable, Sendable, Hashable {
    /// JSON-LD structured data
    case jsonLd = "json-ld"
    /// Microdata
    case microdata
    /// RDFa
    case rdFa = "rdfa"
}
extension StructuredDataType {
    func intoRust() throws -> RustBridge.StructuredDataType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.structuredDataTypeFromJson(json)
    }
}

/// Bounding geometry for an OCR element.
///
/// Supports both axis-aligned rectangles (from Tesseract) and 4-point quadrilaterals
/// (from PaddleOCR and rotated text detection).
public enum OcrBoundingGeometry: Codable, Sendable, Hashable {
    /// Axis-aligned bounding box (typical for Tesseract output).
    case rectangle(left: UInt32, top: UInt32, width: UInt32, height: UInt32)
    /// 4-point quadrilateral for rotated/skewed text (PaddleOCR).
    ///
    /// Points are in clockwise order starting from top-left:
    /// `[top_left, top_right, bottom_right, bottom_left]`
    case quadrilateral(points: String)

    private enum CodingKeys: String, CodingKey {
        case type
        case height
        case left
        case points
        case top
        case width
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .type)
        switch type {
        case "rectangle":
            self = .rectangle(left: try container.decode(UInt32.self, forKey: .left), top: try container.decode(UInt32.self, forKey: .top), width: try container.decode(UInt32.self, forKey: .width), height: try container.decode(UInt32.self, forKey: .height))
        case "quadrilateral":
            self = .quadrilateral(points: try container.decode(String.self, forKey: .points))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .type,
                in: container,
                debugDescription: "Unknown OcrBoundingGeometry type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .rectangle(let left, let top, let width, let height):
            try container.encode("rectangle", forKey: .type)
            try container.encode(left, forKey: .left)
            try container.encode(top, forKey: .top)
            try container.encode(width, forKey: .width)
            try container.encode(height, forKey: .height)
        case .quadrilateral(let points):
            try container.encode("quadrilateral", forKey: .type)
            try container.encode(points, forKey: .points)
        }
    }
}
extension OcrBoundingGeometry {
    func intoRust() throws -> RustBridge.OcrBoundingGeometry {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.ocrBoundingGeometryFromJson(json)
    }
}

/// Hierarchical level of an OCR element.
///
/// Maps to Tesseract's page segmentation hierarchy and provides
/// equivalent semantics for PaddleOCR.
public enum OcrElementLevel: String, Codable, Sendable, Hashable {
    /// Individual word
    case word
    /// Line of text (default for PaddleOCR)
    case line
    /// Paragraph or text block
    case block
    /// Page-level element
    case page
}
extension OcrElementLevel {
    func intoRust() throws -> RustBridge.OcrElementLevel {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.ocrElementLevelFromJson(json)
    }
}

/// Type of paginated unit in a document.
///
/// Distinguishes between different types of "pages" (PDF pages, presentation slides, spreadsheet sheets).
public enum PageUnitType: String, Codable, Sendable, Hashable {
    /// Standard document pages (PDF, DOCX, images)
    case page
    /// Presentation slides (PPTX, ODP)
    case slide
    /// Spreadsheet sheets (XLSX, ODS)
    case sheet
}
extension PageUnitType {
    func intoRust() throws -> RustBridge.PageUnitType {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.pageUnitTypeFromJson(json)
    }
}

/// A single line in a unified-diff hunk.
///
/// Defined here (rather than only in `crate::diff`) so `RevisionDelta` can
/// reference it unconditionally, without requiring the `diff` Cargo feature.
/// `crate::diff` re-exports this type verbatim.
public enum DiffLine: Codable, Sendable, Hashable {
    /// Unchanged context line.
    case context(field0: String)
    /// Line added in the "after" version.
    case added(field0: String)
    /// Line removed from the "before" version.
    case removed(field0: String)

    private enum CodingKeys: String, CodingKey {
        case kind
        case field0 = "_0"
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .kind)
        switch type {
        case "context":
            self = .context(field0: try container.decode(String.self, forKey: .field0))
        case "added":
            self = .added(field0: try container.decode(String.self, forKey: .field0))
        case "removed":
            self = .removed(field0: try container.decode(String.self, forKey: .field0))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .kind,
                in: container,
                debugDescription: "Unknown DiffLine type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .context(let field0):
            try container.encode("context", forKey: .kind)
            try container.encode(field0, forKey: .field0)
        case .added(let field0):
            try container.encode("added", forKey: .kind)
            try container.encode(field0, forKey: .field0)
        case .removed(let field0):
            try container.encode("removed", forKey: .kind)
            try container.encode(field0, forKey: .field0)
        }
    }
}
extension DiffLine {
    func intoRust() throws -> RustBridge.DiffLine {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.diffLineFromJson(json)
    }
}

/// Semantic classification of a tracked change.
public enum RevisionKind: String, Codable, Sendable, Hashable {
    /// Text or content was inserted.
    case insertion
    /// Text or content was deleted.
    case deletion
    /// Run-level formatting (font, size, colour, …) was changed.
    case formatChange = "format_change"
    /// A reviewer comment or annotation.
    case comment
}
extension RevisionKind {
    func intoRust() throws -> RustBridge.RevisionKind {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.revisionKindFromJson(json)
    }
}

/// Best-effort document location for a revision.
public enum RevisionAnchor: Codable, Sendable, Hashable {
    /// Body paragraph, identified by its zero-based index in the document flow.
    case paragraph(index: UInt)
    /// Cell inside a table.
    case tableCell(row: UInt, col: UInt, tableIndex: UInt)
    /// Page, identified by its zero-based index.
    case page(index: UInt)
    /// Presentation slide, identified by its zero-based index.
    case slide(index: UInt)
    /// Spreadsheet cell or range, identified by sheet index and optional name.
    case sheet(index: UInt, name: String?)

    private enum CodingKeys: String, CodingKey {
        case type
        case col
        case index
        case name
        case row
        case tableIndex = "table_index"
    }

    public init(from decoder: Decoder) throws {
        let container = try decoder.container(keyedBy: CodingKeys.self)
        let type = try container.decode(String.self, forKey: .type)
        switch type {
        case "paragraph":
            self = .paragraph(index: try container.decode(UInt.self, forKey: .index))
        case "table_cell":
            self = .tableCell(row: try container.decode(UInt.self, forKey: .row), col: try container.decode(UInt.self, forKey: .col), tableIndex: try container.decode(UInt.self, forKey: .tableIndex))
        case "page":
            self = .page(index: try container.decode(UInt.self, forKey: .index))
        case "slide":
            self = .slide(index: try container.decode(UInt.self, forKey: .index))
        case "sheet":
            self = .sheet(index: try container.decode(UInt.self, forKey: .index), name: try container.decodeIfPresent(String.self, forKey: .name))
        default:
            throw DecodingError.dataCorruptedError(
                forKey: .type,
                in: container,
                debugDescription: "Unknown RevisionAnchor type: \(type)"
            )
        }
    }

    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        switch self {
        case .paragraph(let index):
            try container.encode("paragraph", forKey: .type)
            try container.encode(index, forKey: .index)
        case .tableCell(let row, let col, let tableIndex):
            try container.encode("table_cell", forKey: .type)
            try container.encode(row, forKey: .row)
            try container.encode(col, forKey: .col)
            try container.encode(tableIndex, forKey: .tableIndex)
        case .page(let index):
            try container.encode("page", forKey: .type)
            try container.encode(index, forKey: .index)
        case .slide(let index):
            try container.encode("slide", forKey: .type)
            try container.encode(index, forKey: .index)
        case .sheet(let index, let name):
            try container.encode("sheet", forKey: .type)
            try container.encode(index, forKey: .index)
            try container.encodeIfPresent(name, forKey: .name)
        }
    }
}
extension RevisionAnchor {
    func intoRust() throws -> RustBridge.RevisionAnchor {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.revisionAnchorFromJson(json)
    }
}

/// Semantic classification of an extracted URI.
public enum UriKind: String, Codable, Sendable, Hashable {
    /// A clickable hyperlink (web URL, file link).
    case hyperlink
    /// An image or media resource reference.
    case image
    /// An internal anchor or cross-reference target.
    case anchor
    /// A citation or bibliographic reference (DOI, academic ref).
    case citation
    /// A general reference (e.g. `\ref{}` in LaTeX, `:ref:` in RST).
    case reference
    /// An email address (`mailto:` link or bare email).
    case email
}
extension UriKind {
    func intoRust() throws -> RustBridge.UriKind {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.uriKindFromJson(json)
    }
}

/// Keyword algorithm selection.
public enum KeywordAlgorithm: String, Codable, Sendable, Hashable {
    /// YAKE (Yet Another Keyword Extractor) - statistical approach
    case yake
    /// RAKE (Rapid Automatic Keyword Extraction) - co-occurrence based
    case rake
}
extension KeywordAlgorithm {
    func intoRust() throws -> RustBridge.KeywordAlgorithm {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.keywordAlgorithmFromJson(json)
    }
}

/// Page Segmentation Mode for Tesseract OCR
public enum PSMMode: String, Codable, Sendable, Hashable {
    case osdOnly = "OsdOnly"
    case autoOsd = "AutoOsd"
    case autoOnly = "AutoOnly"
    case auto = "Auto"
    case singleColumn = "SingleColumn"
    case singleBlockVertical = "SingleBlockVertical"
    case singleBlock = "SingleBlock"
    case singleLine = "SingleLine"
    case singleWord = "SingleWord"
    case circleWord = "CircleWord"
    case singleChar = "SingleChar"
}
extension PSMMode {
    func intoRust() throws -> RustBridge.PSMMode {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.psmModeFromJson(json)
    }
}

/// Supported languages in PaddleOCR.
///
/// Maps user-friendly language codes to paddle-ocr-rs language identifiers.
public enum PaddleLanguage: String, Codable, Sendable, Hashable {
    /// English
    case english = "English"
    /// Simplified Chinese
    case chinese = "Chinese"
    /// Japanese
    case japanese = "Japanese"
    /// Korean
    case korean = "Korean"
    /// German
    case german = "German"
    /// French
    case french = "French"
    /// Latin script (covers most European languages)
    case latin = "Latin"
    /// Cyrillic (Russian and related)
    case cyrillic = "Cyrillic"
    /// Traditional Chinese
    case traditionalChinese = "TraditionalChinese"
    /// Thai
    case thai = "Thai"
    /// Greek
    case greek = "Greek"
    /// East Slavic (Russian, Ukrainian, Belarusian)
    case eastSlavic = "EastSlavic"
    /// Arabic (Arabic, Persian, Urdu)
    case arabic = "Arabic"
    /// Devanagari (Hindi, Marathi, Sanskrit, Nepali)
    case devanagari = "Devanagari"
    /// Tamil
    case tamil = "Tamil"
    /// Telugu
    case telugu = "Telugu"
}
extension PaddleLanguage {
    func intoRust() throws -> RustBridge.PaddleLanguage {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.paddleLanguageFromJson(json)
    }
}

/// The 17 canonical document layout classes.
///
/// All model backends (RT-DETR, YOLO, etc.) map their native class IDs
/// to this shared set. Models with fewer classes (DocLayNet: 11, PubLayNet: 5)
/// map to the closest equivalent.
///
/// Wire format is snake_case in all serializers (JSON, TOML, YAML).
public enum LayoutClass: String, Codable, Sendable, Hashable {
    case caption
    case footnote
    case formula
    case listItem = "list_item"
    case pageFooter = "page_footer"
    case pageHeader = "page_header"
    case picture
    case sectionHeader = "section_header"
    case table
    case text
    case title
    case documentIndex = "document_index"
    case code
    case checkboxSelected = "checkbox_selected"
    case checkboxUnselected = "checkbox_unselected"
    case form
    case keyValueRegion = "key_value_region"
}
extension LayoutClass {
    func intoRust() throws -> RustBridge.LayoutClass {
        let data = try JSONEncoder().encode(self)
        let json = String(data: data, encoding: .utf8) ?? "null"
        return try RustBridge.layoutClassFromJson(json)
    }
}

/// Main error type for all Kreuzberg operations.
///
/// All errors in Kreuzberg use this enum, which preserves error chains
/// and provides context for debugging.
///
/// # Variants
///
/// - `Io` - File system and I/O errors (always bubble up)
/// - `Parsing` - Document parsing errors (corrupt files, unsupported features)
/// - `Ocr` - OCR processing errors
/// - `Validation` - Input validation errors (invalid paths, config, parameters)
/// - `Cache` - Cache operation errors (non-fatal, can be ignored)
/// - `ImageProcessing` - Image manipulation errors
/// - `Serialization` - JSON/MessagePack serialization errors
/// - `MissingDependency` - Missing optional dependencies (tesseract, etc.)
/// - `Plugin` - Plugin-specific errors
/// - `LockPoisoned` - Mutex/RwLock poisoning (should not happen in normal operation)
/// - `UnsupportedFormat` - Unsupported MIME type or file format
/// - `Other` - Catch-all for uncommon errors
public enum KreuzbergError: Swift.Error {
    case io(message: String, field0: String)
    case parsing(message: String)
    case ocr(message: String)
    case validation(message: String)
    case cache(message: String)
    case imageProcessing(message: String)
    case serialization(message: String)
    case missingDependency(message: String, field0: String)
    case plugin(message: String, pluginName: String)
    case lockPoisoned(message: String, field0: String)
    case unsupportedFormat(message: String, field0: String)
    case embedding(message: String)
    case timeout(message: String, elapsedMs: UInt64, limitMs: UInt64)
    case cancelled
    case security(message: String)
    case other(message: String, field0: String)
}

// MARK: - Convenience Wrapper Functions
// These wrappers bridge String / [UInt8] inputs to RustBridge's
// RustVec<UInt8> requirement. The config parameter must be a fully
// constructed opaque type (built via the generated initializer);
// JSON-config decoding is not available because swift-bridge opaque
// proxy classes are not Codable Swift structs.

/// Converts a Swift `[UInt8]` array to a `RustVec<UInt8>` by pushing each byte.
/// swift-bridge's `RustVec<T>` runtime only exposes `init()` and `push(value:)`;
/// no array-initializer shorthand exists.
private func makeByteVec(_ bytes: [UInt8]) -> RustVec<UInt8> {
    let vec = RustVec<UInt8>()
    for b in bytes { vec.push(value: b) }
    return vec
}

/// Convenience overload: accepts a UTF-8 `String` and converts it to bytes.
public func extractBytes(
    content: String,
    mimeType: String
,
    config: ExtractionConfig

) throws -> ExtractionResult {
    return try extractBytesSync(makeByteVec(Array(content.utf8)), mimeType
, config
)
}
/// Convenience overload: accepts a `[UInt8]` byte array.
public func extractBytes(
    content: [UInt8],
    mimeType: String
,
    config: ExtractionConfig

) throws -> ExtractionResult {
    return try extractBytesSync(makeByteVec(content), mimeType
, config
)
}
/// Convenience overload: accepts a file path as a `String`.
public func extractFile(
    path: String,
    mimeType: String? = nil
,
    config: ExtractionConfig

) throws -> ExtractionResult {
    return try extractFileSync(path, mimeType
, config
)
}
// MARK: - JSON-String Convenience Overloads
// These overloads accept JSON-encoded config parameters and decode them automatically.
// Enables e2e tests to pass JSON strings directly without typed config construction.

/// Resolves a string argument as either a file path or literal UTF-8 content.
/// Searches: current working directory, ALEF_TEST_DOCUMENTS_DIR env var,
/// and ancestor `test_documents/` or `fixtures/` directories (up to 16 levels).
/// If no file is found, treats the string as UTF-8 content and returns its bytes.
private func _loadBytesFromPathOrUtf8(_ pathOrContent: String) throws -> [UInt8] {
    let fm = FileManager.default
    var roots: [String] = [fm.currentDirectoryPath]
    if let envRoot = ProcessInfo.processInfo.environment["ALEF_TEST_DOCUMENTS_DIR"] {
        roots.append(envRoot)
    }
    var walker = URL(fileURLWithPath: fm.currentDirectoryPath)
    for _ in 0..<16 {
        roots.append(walker.appendingPathComponent("test_documents").path)
        roots.append(walker.appendingPathComponent("fixtures").path)
        let parent = walker.deletingLastPathComponent()
        if parent.path == walker.path { break }
        walker = parent
    }
    let candidates = [pathOrContent] + roots.map { ($0 as NSString).appendingPathComponent(pathOrContent) }
    for path in candidates {
        if fm.fileExists(atPath: path), let data = try? Data(contentsOf: URL(fileURLWithPath: path)) {
            return [UInt8](data)
        }
    }
    return [UInt8](pathOrContent.utf8)
}

public func extractBytes(_ content: [UInt8], _ mimeType: String, _ configJson: String) async throws -> ExtractionResult {
    let config = try extractionConfigFromJson(configJson)
    return try await extractBytes(content: content, mimeType: mimeType, config: config)
}

public func extractFile(_ path: String, _ mimeType: String?, _ configJson: String) async throws -> ExtractionResult {
    let config = try extractionConfigFromJson(configJson)
    return try await extractFile(path: path, mimeType: mimeType, config: config)
}

public func extractFileSync(_ path: String, _ mimeType: String?, _ configJson: String) throws -> ExtractionResult {
    let config = try extractionConfigFromJson(configJson)
    return try extractFileSync(path: path, mimeType: mimeType, config: config)
}

public func extractBytesSync(_ content: [UInt8], _ mimeType: String, _ configJson: String) throws -> ExtractionResult {
    let config = try extractionConfigFromJson(configJson)
    return try extractBytesSync(content: content, mimeType: mimeType, config: config)
}

public func batchExtractFilesSync(_ items: [BatchFileItem], _ configJson: String) throws -> [ExtractionResult] {
    let config = try extractionConfigFromJson(configJson)
    return try batchExtractFilesSync(items: items, config: config)
}

public func batchExtractBytesSync(_ items: [BatchBytesItem], _ configJson: String) throws -> [ExtractionResult] {
    let config = try extractionConfigFromJson(configJson)
    return try batchExtractBytesSync(items: items, config: config)
}

public func batchExtractFiles(_ items: [BatchFileItem], _ configJson: String) async throws -> [ExtractionResult] {
    let config = try extractionConfigFromJson(configJson)
    return try await batchExtractFiles(items: items, config: config)
}

public func batchExtractBytes(_ items: [BatchBytesItem], _ configJson: String) async throws -> [ExtractionResult] {
    let config = try extractionConfigFromJson(configJson)
    return try await batchExtractBytes(items: items, config: config)
}

public func compare(_ configJson: String, _ b: ExtractionResult, _ opts: DiffOptions) throws -> ExtractionDiff {
    let config = try extractionResultFromJson(configJson)
    return try compare(a: config, b: b, opts: opts)
}

public func compare(_ a: ExtractionResult, _ configJson: String, _ opts: DiffOptions) throws -> ExtractionDiff {
    let config = try extractionResultFromJson(configJson)
    return try compare(a: a, b: config, opts: opts)
}

public func compare(_ a: ExtractionResult, _ b: ExtractionResult, _ configJson: String) throws -> ExtractionDiff {
    let config = try diffOptionsFromJson(configJson)
    return try compare(a: a, b: b, opts: config)
}

public func embedTextsAsync(_ texts: [String], _ configJson: String) async throws -> [[Float]] {
    let config = try embeddingConfigFromJson(configJson)
    return try await embedTextsAsync(texts: texts, config: config)
}

public func embedTexts(_ texts: [String], _ configJson: String) throws -> [[Float]] {
    let config = try embeddingConfigFromJson(configJson)
    return try embedTexts(texts: texts, config: config)
}

// MARK: - From-JSON Helpers
// Public helpers that decode JSON into first-class Swift types.
// First-class struct types (Codable) use JSONDecoder directly.
// Opaque RustBridge types forward to RustBridge.

public func cacheStatsFromJson(_ json: String) throws -> CacheStats {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(CacheStats.self, from: data)
}

public func accelerationConfigFromJson(_ json: String) throws -> AccelerationConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(AccelerationConfig.self, from: data)
}

public func contentFilterConfigFromJson(_ json: String) throws -> ContentFilterConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ContentFilterConfig.self, from: data)
}

public func emailConfigFromJson(_ json: String) throws -> EmailConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(EmailConfig.self, from: data)
}

public func extractionConfigFromJson(_ json: String) throws -> ExtractionConfig {
    return try RustBridge.extractionConfigFromJson(json)
}

public func fileExtractionConfigFromJson(_ json: String) throws -> FileExtractionConfig {
    return try RustBridge.fileExtractionConfigFromJson(json)
}

public func batchBytesItemFromJson(_ json: String) throws -> BatchBytesItem {
    return try RustBridge.batchBytesItemFromJson(json)
}

public func batchFileItemFromJson(_ json: String) throws -> BatchFileItem {
    return try RustBridge.batchFileItemFromJson(json)
}

public func imageExtractionConfigFromJson(_ json: String) throws -> ImageExtractionConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ImageExtractionConfig.self, from: data)
}

public func tokenReductionOptionsFromJson(_ json: String) throws -> TokenReductionOptions {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TokenReductionOptions.self, from: data)
}

public func languageDetectionConfigFromJson(_ json: String) throws -> LanguageDetectionConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LanguageDetectionConfig.self, from: data)
}

public func htmlOutputConfigFromJson(_ json: String) throws -> HtmlOutputConfig {
    return try RustBridge.htmlOutputConfigFromJson(json)
}

public func layoutDetectionConfigFromJson(_ json: String) throws -> LayoutDetectionConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LayoutDetectionConfig.self, from: data)
}

public func llmConfigFromJson(_ json: String) throws -> LlmConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LlmConfig.self, from: data)
}

public func structuredExtractionConfigFromJson(_ json: String) throws -> StructuredExtractionConfig {
    return try RustBridge.structuredExtractionConfigFromJson(json)
}

public func ocrQualityThresholdsFromJson(_ json: String) throws -> OcrQualityThresholds {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrQualityThresholds.self, from: data)
}

public func ocrPipelineStageFromJson(_ json: String) throws -> OcrPipelineStage {
    return try RustBridge.ocrPipelineStageFromJson(json)
}

public func ocrPipelineConfigFromJson(_ json: String) throws -> OcrPipelineConfig {
    return try RustBridge.ocrPipelineConfigFromJson(json)
}

public func ocrConfigFromJson(_ json: String) throws -> OcrConfig {
    return try RustBridge.ocrConfigFromJson(json)
}

public func pageConfigFromJson(_ json: String) throws -> PageConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageConfig.self, from: data)
}

public func pdfConfigFromJson(_ json: String) throws -> PdfConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PdfConfig.self, from: data)
}

public func hierarchyConfigFromJson(_ json: String) throws -> HierarchyConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(HierarchyConfig.self, from: data)
}

public func postProcessorConfigFromJson(_ json: String) throws -> PostProcessorConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PostProcessorConfig.self, from: data)
}

public func chunkingConfigFromJson(_ json: String) throws -> ChunkingConfig {
    return try RustBridge.chunkingConfigFromJson(json)
}

public func embeddingConfigFromJson(_ json: String) throws -> EmbeddingConfig {
    return try RustBridge.embeddingConfigFromJson(json)
}

public func treeSitterConfigFromJson(_ json: String) throws -> TreeSitterConfig {
    return try RustBridge.treeSitterConfigFromJson(json)
}

public func treeSitterProcessConfigFromJson(_ json: String) throws -> TreeSitterProcessConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TreeSitterProcessConfig.self, from: data)
}

public func supportedFormatFromJson(_ json: String) throws -> SupportedFormat {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(SupportedFormat.self, from: data)
}

public func serverConfigFromJson(_ json: String) throws -> ServerConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ServerConfig.self, from: data)
}

public func structuredDataResultFromJson(_ json: String) throws -> StructuredDataResult {
    return try RustBridge.structuredDataResultFromJson(json)
}

public func docxAppPropertiesFromJson(_ json: String) throws -> DocxAppProperties {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DocxAppProperties.self, from: data)
}

public func xlsxAppPropertiesFromJson(_ json: String) throws -> XlsxAppProperties {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(XlsxAppProperties.self, from: data)
}

public func pptxAppPropertiesFromJson(_ json: String) throws -> PptxAppProperties {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PptxAppProperties.self, from: data)
}

public func corePropertiesFromJson(_ json: String) throws -> CoreProperties {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(CoreProperties.self, from: data)
}

public func securityLimitsFromJson(_ json: String) throws -> SecurityLimits {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(SecurityLimits.self, from: data)
}

public func tokenReductionConfigFromJson(_ json: String) throws -> TokenReductionConfig {
    return try RustBridge.tokenReductionConfigFromJson(json)
}

public func pdfAnnotationFromJson(_ json: String) throws -> PdfAnnotation {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PdfAnnotation.self, from: data)
}

public func djotContentFromJson(_ json: String) throws -> DjotContent {
    return try RustBridge.djotContentFromJson(json)
}

public func formattedBlockFromJson(_ json: String) throws -> FormattedBlock {
    return try RustBridge.formattedBlockFromJson(json)
}

public func inlineElementFromJson(_ json: String) throws -> InlineElement {
    return try RustBridge.inlineElementFromJson(json)
}

public func djotImageFromJson(_ json: String) throws -> DjotImage {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DjotImage.self, from: data)
}

public func djotLinkFromJson(_ json: String) throws -> DjotLink {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DjotLink.self, from: data)
}

public func footnoteFromJson(_ json: String) throws -> Footnote {
    return try RustBridge.footnoteFromJson(json)
}

public func documentStructureFromJson(_ json: String) throws -> DocumentStructure {
    return try RustBridge.documentStructureFromJson(json)
}

public func documentRelationshipFromJson(_ json: String) throws -> DocumentRelationship {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DocumentRelationship.self, from: data)
}

public func documentNodeFromJson(_ json: String) throws -> DocumentNode {
    return try RustBridge.documentNodeFromJson(json)
}

public func tableGridFromJson(_ json: String) throws -> TableGrid {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TableGrid.self, from: data)
}

public func gridCellFromJson(_ json: String) throws -> GridCell {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(GridCell.self, from: data)
}

public func textAnnotationFromJson(_ json: String) throws -> TextAnnotation {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TextAnnotation.self, from: data)
}

public func extractionResultFromJson(_ json: String) throws -> ExtractionResult {
    return try RustBridge.extractionResultFromJson(json)
}

public func archiveEntryFromJson(_ json: String) throws -> ArchiveEntry {
    return try RustBridge.archiveEntryFromJson(json)
}

public func processingWarningFromJson(_ json: String) throws -> ProcessingWarning {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ProcessingWarning.self, from: data)
}

public func llmUsageFromJson(_ json: String) throws -> LlmUsage {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LlmUsage.self, from: data)
}

public func chunkFromJson(_ json: String) throws -> Chunk {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(Chunk.self, from: data)
}

public func headingContextFromJson(_ json: String) throws -> HeadingContext {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(HeadingContext.self, from: data)
}

public func headingLevelFromJson(_ json: String) throws -> HeadingLevel {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(HeadingLevel.self, from: data)
}

public func chunkMetadataFromJson(_ json: String) throws -> ChunkMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ChunkMetadata.self, from: data)
}

public func extractedImageFromJson(_ json: String) throws -> ExtractedImage {
    return try RustBridge.extractedImageFromJson(json)
}

public func boundingBoxFromJson(_ json: String) throws -> BoundingBox {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(BoundingBox.self, from: data)
}

public func elementMetadataFromJson(_ json: String) throws -> ElementMetadata {
    return try RustBridge.elementMetadataFromJson(json)
}

public func elementFromJson(_ json: String) throws -> Element {
    return try RustBridge.elementFromJson(json)
}

public func excelWorkbookFromJson(_ json: String) throws -> ExcelWorkbook {
    return try RustBridge.excelWorkbookFromJson(json)
}

public func excelSheetFromJson(_ json: String) throws -> ExcelSheet {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ExcelSheet.self, from: data)
}

public func xmlExtractionResultFromJson(_ json: String) throws -> XmlExtractionResult {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(XmlExtractionResult.self, from: data)
}

public func textExtractionResultFromJson(_ json: String) throws -> TextExtractionResult {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TextExtractionResult.self, from: data)
}

public func pptxExtractionResultFromJson(_ json: String) throws -> PptxExtractionResult {
    return try RustBridge.pptxExtractionResultFromJson(json)
}

public func emailExtractionResultFromJson(_ json: String) throws -> EmailExtractionResult {
    return try RustBridge.emailExtractionResultFromJson(json)
}

public func emailAttachmentFromJson(_ json: String) throws -> EmailAttachment {
    return try RustBridge.emailAttachmentFromJson(json)
}

public func ocrExtractionResultFromJson(_ json: String) throws -> OcrExtractionResult {
    return try RustBridge.ocrExtractionResultFromJson(json)
}

public func ocrTableFromJson(_ json: String) throws -> OcrTable {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrTable.self, from: data)
}

public func ocrTableBoundingBoxFromJson(_ json: String) throws -> OcrTableBoundingBox {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrTableBoundingBox.self, from: data)
}

public func imagePreprocessingConfigFromJson(_ json: String) throws -> ImagePreprocessingConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ImagePreprocessingConfig.self, from: data)
}

public func tesseractConfigFromJson(_ json: String) throws -> TesseractConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TesseractConfig.self, from: data)
}

public func imagePreprocessingMetadataFromJson(_ json: String) throws -> ImagePreprocessingMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ImagePreprocessingMetadata.self, from: data)
}

public func metadataFromJson(_ json: String) throws -> Metadata {
    return try RustBridge.metadataFromJson(json)
}

public func excelMetadataFromJson(_ json: String) throws -> ExcelMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ExcelMetadata.self, from: data)
}

public func emailMetadataFromJson(_ json: String) throws -> EmailMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(EmailMetadata.self, from: data)
}

public func archiveMetadataFromJson(_ json: String) throws -> ArchiveMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ArchiveMetadata.self, from: data)
}

public func imageMetadataFromJson(_ json: String) throws -> ImageMetadata {
    return try RustBridge.imageMetadataFromJson(json)
}

public func xmlMetadataFromJson(_ json: String) throws -> XmlMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(XmlMetadata.self, from: data)
}

public func textMetadataFromJson(_ json: String) throws -> TextMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TextMetadata.self, from: data)
}

public func headerMetadataFromJson(_ json: String) throws -> HeaderMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(HeaderMetadata.self, from: data)
}

public func linkMetadataFromJson(_ json: String) throws -> LinkMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LinkMetadata.self, from: data)
}

public func imageMetadataTypeFromJson(_ json: String) throws -> ImageMetadataType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ImageMetadataType.self, from: data)
}

public func structuredDataFromJson(_ json: String) throws -> StructuredData {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(StructuredData.self, from: data)
}

public func htmlMetadataFromJson(_ json: String) throws -> HtmlMetadata {
    return try RustBridge.htmlMetadataFromJson(json)
}

public func ocrMetadataFromJson(_ json: String) throws -> OcrMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrMetadata.self, from: data)
}

public func errorMetadataFromJson(_ json: String) throws -> ErrorMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ErrorMetadata.self, from: data)
}

public func pptxMetadataFromJson(_ json: String) throws -> PptxMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PptxMetadata.self, from: data)
}

public func docxMetadataFromJson(_ json: String) throws -> DocxMetadata {
    return try RustBridge.docxMetadataFromJson(json)
}

public func csvMetadataFromJson(_ json: String) throws -> CsvMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(CsvMetadata.self, from: data)
}

public func bibtexMetadataFromJson(_ json: String) throws -> BibtexMetadata {
    return try RustBridge.bibtexMetadataFromJson(json)
}

public func citationMetadataFromJson(_ json: String) throws -> CitationMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(CitationMetadata.self, from: data)
}

public func yearRangeFromJson(_ json: String) throws -> YearRange {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(YearRange.self, from: data)
}

public func fictionBookMetadataFromJson(_ json: String) throws -> FictionBookMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(FictionBookMetadata.self, from: data)
}

public func dbfMetadataFromJson(_ json: String) throws -> DbfMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DbfMetadata.self, from: data)
}

public func dbfFieldInfoFromJson(_ json: String) throws -> DbfFieldInfo {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DbfFieldInfo.self, from: data)
}

public func jatsMetadataFromJson(_ json: String) throws -> JatsMetadata {
    return try RustBridge.jatsMetadataFromJson(json)
}

public func contributorRoleFromJson(_ json: String) throws -> ContributorRole {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ContributorRole.self, from: data)
}

public func epubMetadataFromJson(_ json: String) throws -> EpubMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(EpubMetadata.self, from: data)
}

public func pstMetadataFromJson(_ json: String) throws -> PstMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PstMetadata.self, from: data)
}

public func ocrConfidenceFromJson(_ json: String) throws -> OcrConfidence {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrConfidence.self, from: data)
}

public func ocrRotationFromJson(_ json: String) throws -> OcrRotation {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrRotation.self, from: data)
}

public func ocrElementFromJson(_ json: String) throws -> OcrElement {
    return try RustBridge.ocrElementFromJson(json)
}

public func ocrElementConfigFromJson(_ json: String) throws -> OcrElementConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrElementConfig.self, from: data)
}

public func pageStructureFromJson(_ json: String) throws -> PageStructure {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageStructure.self, from: data)
}

public func pageBoundaryFromJson(_ json: String) throws -> PageBoundary {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageBoundary.self, from: data)
}

public func pageInfoFromJson(_ json: String) throws -> PageInfo {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageInfo.self, from: data)
}

public func pageContentFromJson(_ json: String) throws -> PageContent {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageContent.self, from: data)
}

public func layoutRegionFromJson(_ json: String) throws -> LayoutRegion {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LayoutRegion.self, from: data)
}

public func pageHierarchyFromJson(_ json: String) throws -> PageHierarchy {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageHierarchy.self, from: data)
}

public func hierarchicalBlockFromJson(_ json: String) throws -> HierarchicalBlock {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(HierarchicalBlock.self, from: data)
}

public func cellChangeFromJson(_ json: String) throws -> CellChange {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(CellChange.self, from: data)
}

public func documentRevisionFromJson(_ json: String) throws -> DocumentRevision {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DocumentRevision.self, from: data)
}

public func revisionDeltaFromJson(_ json: String) throws -> RevisionDelta {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(RevisionDelta.self, from: data)
}

public func tableFromJson(_ json: String) throws -> Table {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(Table.self, from: data)
}

public func tableCellFromJson(_ json: String) throws -> TableCell {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TableCell.self, from: data)
}

public func extractedUriFromJson(_ json: String) throws -> ExtractedUri {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ExtractedUri.self, from: data)
}

public func detectResponseFromJson(_ json: String) throws -> DetectResponse {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DetectResponse.self, from: data)
}

public func diffOptionsFromJson(_ json: String) throws -> DiffOptions {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DiffOptions.self, from: data)
}

public func extractionDiffFromJson(_ json: String) throws -> ExtractionDiff {
    return try RustBridge.extractionDiffFromJson(json)
}

public func diffHunkFromJson(_ json: String) throws -> DiffHunk {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DiffHunk.self, from: data)
}

public func tableDiffFromJson(_ json: String) throws -> TableDiff {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TableDiff.self, from: data)
}

public func embeddedChangesFromJson(_ json: String) throws -> EmbeddedChanges {
    return try RustBridge.embeddedChangesFromJson(json)
}

public func embeddedDiffFromJson(_ json: String) throws -> EmbeddedDiff {
    return try RustBridge.embeddedDiffFromJson(json)
}

public func embeddingPresetFromJson(_ json: String) throws -> EmbeddingPreset {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(EmbeddingPreset.self, from: data)
}

public func yakeParamsFromJson(_ json: String) throws -> YakeParams {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(YakeParams.self, from: data)
}

public func rakeParamsFromJson(_ json: String) throws -> RakeParams {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(RakeParams.self, from: data)
}

public func keywordConfigFromJson(_ json: String) throws -> KeywordConfig {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(KeywordConfig.self, from: data)
}

public func keywordFromJson(_ json: String) throws -> Keyword {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(Keyword.self, from: data)
}

public func paddleOcrConfigFromJson(_ json: String) throws -> PaddleOcrConfig {
    return try RustBridge.paddleOcrConfigFromJson(json)
}

public func modelPathsFromJson(_ json: String) throws -> ModelPaths {
    return try RustBridge.modelPathsFromJson(json)
}

public func orientationResultFromJson(_ json: String) throws -> OrientationResult {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OrientationResult.self, from: data)
}

public func bBoxFromJson(_ json: String) throws -> BBox {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(BBox.self, from: data)
}

public func layoutDetectionFromJson(_ json: String) throws -> LayoutDetection {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LayoutDetection.self, from: data)
}

public func recognizedTableFromJson(_ json: String) throws -> RecognizedTable {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(RecognizedTable.self, from: data)
}

public func detectionResultFromJson(_ json: String) throws -> DetectionResult {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DetectionResult.self, from: data)
}

public func embeddedFileFromJson(_ json: String) throws -> EmbeddedFile {
    return try RustBridge.embeddedFileFromJson(json)
}

public func pdfMetadataFromJson(_ json: String) throws -> PdfMetadata {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PdfMetadata.self, from: data)
}

public func executionProviderTypeFromJson(_ json: String) throws -> ExecutionProviderType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ExecutionProviderType.self, from: data)
}

public func outputFormatFromJson(_ json: String) throws -> OutputFormat {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OutputFormat.self, from: data)
}

public func htmlThemeFromJson(_ json: String) throws -> HtmlTheme {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(HtmlTheme.self, from: data)
}

public func tableModelFromJson(_ json: String) throws -> TableModel {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TableModel.self, from: data)
}

public func chunkerTypeFromJson(_ json: String) throws -> ChunkerType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ChunkerType.self, from: data)
}

public func chunkSizingFromJson(_ json: String) throws -> ChunkSizing {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ChunkSizing.self, from: data)
}

public func embeddingModelTypeFromJson(_ json: String) throws -> EmbeddingModelType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(EmbeddingModelType.self, from: data)
}

public func codeContentModeFromJson(_ json: String) throws -> CodeContentMode {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(CodeContentMode.self, from: data)
}

public func ocrBackendTypeFromJson(_ json: String) throws -> OcrBackendType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrBackendType.self, from: data)
}

public func processingStageFromJson(_ json: String) throws -> ProcessingStage {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ProcessingStage.self, from: data)
}

public func reductionLevelFromJson(_ json: String) throws -> ReductionLevel {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ReductionLevel.self, from: data)
}

public func pdfAnnotationTypeFromJson(_ json: String) throws -> PdfAnnotationType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PdfAnnotationType.self, from: data)
}

public func blockTypeFromJson(_ json: String) throws -> BlockType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(BlockType.self, from: data)
}

public func inlineTypeFromJson(_ json: String) throws -> InlineType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(InlineType.self, from: data)
}

public func relationshipKindFromJson(_ json: String) throws -> RelationshipKind {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(RelationshipKind.self, from: data)
}

public func contentLayerFromJson(_ json: String) throws -> ContentLayer {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ContentLayer.self, from: data)
}

public func nodeContentFromJson(_ json: String) throws -> NodeContent {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(NodeContent.self, from: data)
}

public func annotationKindFromJson(_ json: String) throws -> AnnotationKind {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(AnnotationKind.self, from: data)
}

public func extractionMethodFromJson(_ json: String) throws -> ExtractionMethod {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ExtractionMethod.self, from: data)
}

public func chunkTypeFromJson(_ json: String) throws -> ChunkType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ChunkType.self, from: data)
}

public func imageKindFromJson(_ json: String) throws -> ImageKind {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ImageKind.self, from: data)
}

public func resultFormatFromJson(_ json: String) throws -> ResultFormat {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ResultFormat.self, from: data)
}

public func elementTypeFromJson(_ json: String) throws -> ElementType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ElementType.self, from: data)
}

public func formatMetadataFromJson(_ json: String) throws -> FormatMetadata {
    return try RustBridge.formatMetadataFromJson(json)
}

public func textDirectionFromJson(_ json: String) throws -> TextDirection {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(TextDirection.self, from: data)
}

public func linkTypeFromJson(_ json: String) throws -> LinkType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LinkType.self, from: data)
}

public func imageTypeFromJson(_ json: String) throws -> ImageType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(ImageType.self, from: data)
}

public func structuredDataTypeFromJson(_ json: String) throws -> StructuredDataType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(StructuredDataType.self, from: data)
}

public func ocrBoundingGeometryFromJson(_ json: String) throws -> OcrBoundingGeometry {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrBoundingGeometry.self, from: data)
}

public func ocrElementLevelFromJson(_ json: String) throws -> OcrElementLevel {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(OcrElementLevel.self, from: data)
}

public func pageUnitTypeFromJson(_ json: String) throws -> PageUnitType {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PageUnitType.self, from: data)
}

public func diffLineFromJson(_ json: String) throws -> DiffLine {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(DiffLine.self, from: data)
}

public func revisionKindFromJson(_ json: String) throws -> RevisionKind {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(RevisionKind.self, from: data)
}

public func revisionAnchorFromJson(_ json: String) throws -> RevisionAnchor {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(RevisionAnchor.self, from: data)
}

public func uriKindFromJson(_ json: String) throws -> UriKind {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(UriKind.self, from: data)
}

public func keywordAlgorithmFromJson(_ json: String) throws -> KeywordAlgorithm {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(KeywordAlgorithm.self, from: data)
}

public func psmModeFromJson(_ json: String) throws -> PSMMode {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PSMMode.self, from: data)
}

public func paddleLanguageFromJson(_ json: String) throws -> PaddleLanguage {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(PaddleLanguage.self, from: data)
}

public func layoutClassFromJson(_ json: String) throws -> LayoutClass {
    let data = json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode(LayoutClass.self, from: data)
}

// MARK: - Free-function Forwarders
// Re-export every public free function on the source Rust crate as a
// top-level `public func` on the host module so consumers do not need to
// `import RustBridge` directly. Forwarders take Swift-native parameter
// types and convert to the swift-bridge runtime types internally.

/// Synchronous wrapper for `extract_file`.
///
/// This is a convenience function that blocks the current thread until extraction completes.
/// For async code, use `extract_file` directly.
///
/// Uses the global Tokio runtime for 100x+ performance improvement over creating
/// a new runtime per call. Always uses the global runtime to avoid nested runtime issues.
///
/// This function is only available with the `tokio-runtime` feature. For WASM targets,
/// use a truly synchronous extraction approach instead.
///
/// # Example
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::extract_file_sync;
/// use kreuzberg::core::config::ExtractionConfig;
///
/// let config = ExtractionConfig::default();
/// let result = extract_file_sync("document.pdf", None, &config)?;
/// println!("Content: {}", result.content);
/// ```
public func extractFileSync(path: String, mimeType: String?, config: ExtractionConfig) throws -> ExtractionResult {
    return try RustBridge.extractFileSync(path, mimeType, config)
}

/// Synchronous wrapper for `extract_bytes`.
///
/// Uses the global Tokio runtime for 100x+ performance improvement over creating
/// a new runtime per call.
///
/// With the `tokio-runtime` feature, this blocks the current thread using the global
/// Tokio runtime. Without it (WASM), this calls a truly synchronous implementation.
///
/// # Example
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::extract_bytes_sync;
/// use kreuzberg::core::config::ExtractionConfig;
///
/// let config = ExtractionConfig::default();
/// let bytes = b"Hello, world!";
/// let result = extract_bytes_sync(bytes, "text/plain", &config)?;
/// println!("Content: {}", result.content);
/// ```
public func extractBytesSync(content: [UInt8], mimeType: String, config: ExtractionConfig) throws -> ExtractionResult {
    let _rb_content: RustVec<UInt8> = { let v = RustVec<UInt8>(); for b in content { v.push(value: b) }; return v }()
    return try RustBridge.extractBytesSync(_rb_content, mimeType, config)
}

/// Synchronous wrapper for `batch_extract_files`.
///
/// Uses the global Tokio runtime for optimal performance.
/// Only available with `tokio-runtime` (WASM has no filesystem).
///
/// # Example
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::batch_extract_files_sync;
/// use kreuzberg::core::config::{ExtractionConfig, BatchFileItem, FileExtractionConfig};
///
/// let config = ExtractionConfig::default();
/// let items = vec![
///     BatchFileItem {
///         path: "doc1.pdf".into(),
///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
///     },
///     BatchFileItem { path: "doc2.pdf".into(), config: None },
/// ];
/// let results = batch_extract_files_sync(items, &config)?;
/// ```
public func batchExtractFilesSync(items: [BatchFileItem], config: ExtractionConfig) throws -> [ExtractionResult] {
    let _rb_items: RustVec<BatchFileItem> = { let v = RustVec<BatchFileItem>(); for x in items { v.push(value: x) }; return v }()
    return try RustBridge.batchExtractFilesSync(_rb_items, config).map { ref in var item = try RustBridge.ExtractionResult(ptr: ref.ptr); item.isOwned = false; return item }
}

/// Synchronous wrapper for `batch_extract_bytes`.
///
/// Uses the global Tokio runtime for optimal performance.
/// With the `tokio-runtime` feature, this blocks the current thread using the global
/// Tokio runtime. Without it (WASM), this calls a truly synchronous implementation
/// that iterates through items and calls `extract_bytes_sync()`.
///
/// # Example
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::batch_extract_bytes_sync;
/// use kreuzberg::core::config::{ExtractionConfig, BatchBytesItem, FileExtractionConfig};
///
/// let config = ExtractionConfig::default();
/// let items = vec![
///     BatchBytesItem { content: b"content".to_vec(), mime_type: "text/plain".to_string(), config: None },
///     BatchBytesItem {
///         content: b"other".to_vec(),
///         mime_type: "text/plain".to_string(),
///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
///     },
/// ];
/// let results = batch_extract_bytes_sync(items, &config)?;
/// ```
public func batchExtractBytesSync(items: [BatchBytesItem], config: ExtractionConfig) throws -> [ExtractionResult] {
    let _rb_items: RustVec<BatchBytesItem> = { let v = RustVec<BatchBytesItem>(); for x in items { v.push(value: x) }; return v }()
    return try RustBridge.batchExtractBytesSync(_rb_items, config).map { ref in var item = try RustBridge.ExtractionResult(ptr: ref.ptr); item.isOwned = false; return item }
}

/// Extract content from multiple files concurrently.
///
/// This function processes multiple files in parallel, automatically managing
/// concurrency to prevent resource exhaustion. The concurrency limit can be
/// configured via `ExtractionConfig::max_concurrent_extractions` or defaults
/// to `(num_cpus * 1.5).ceil()`.
///
/// Each file can optionally specify a [`FileExtractionConfig`] that overrides specific
/// fields from the batch-level `config`. Pass `None` for a file to use the batch defaults.
/// Batch-level settings like `max_concurrent_extractions` and `use_cache` are always
/// taken from the batch-level `config`.
///
/// # Arguments
///
/// * `items` - Vector of `BatchFileItem` structs, each containing a path and optional
///   per-file configuration overrides.
/// * `config` - Batch-level extraction configuration (provides defaults and batch settings)
///
/// # Returns
///
/// A vector of `ExtractionResult` in the same order as the input items.
///
/// # Errors
///
/// Individual file errors are captured in the result metadata. System errors
/// (IO, RuntimeError equivalents) will bubble up and fail the entire batch.
///
/// # Examples
///
/// Simple usage with no per-file overrides:
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::batch_extract_files;
/// use kreuzberg::core::config::{ExtractionConfig, BatchFileItem};
/// use std::path::PathBuf;
///
/// let config = ExtractionConfig::default();
/// let items = vec![
///     BatchFileItem { path: "doc1.pdf".into(), config: None },
///     BatchFileItem { path: "doc2.pdf".into(), config: None },
/// ];
/// let results = batch_extract_files(items, &config).await?;
/// println!("Processed {} files", results.len());
/// ```
///
/// Per-file configuration overrides:
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::batch_extract_files;
/// use kreuzberg::core::config::{ExtractionConfig, BatchFileItem, FileExtractionConfig};
/// use std::path::PathBuf;
///
/// let config = ExtractionConfig::default();
/// let items = vec![
///     BatchFileItem {
///         path: "scan.pdf".into(),
///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
///     },
///     BatchFileItem { path: "notes.txt".into(), config: None },
/// ];
/// let results = batch_extract_files(items, &config).await?;
/// ```
public func batchExtractFiles(items: [BatchFileItem], config: ExtractionConfig) async throws -> [ExtractionResult] {
    let _rb_items: RustVec<BatchFileItem> = { let v = RustVec<BatchFileItem>(); for x in items { v.push(value: x) }; return v }()
    return try await Task.detached(priority: .userInitiated) {
        let result = try RustBridge.batchExtractFiles(_rb_items, config)
        var items: [[ExtractionResult]] = []
        for ref in result {
            var item = try RustBridge.ExtractionResult(ptr: ref.ptr)
            item.isOwned = false
            items.append(item)
        }
        return items
    }.value
}

/// Extract content from multiple byte arrays concurrently.
///
/// This function processes multiple byte arrays in parallel, automatically managing
/// concurrency to prevent resource exhaustion. The concurrency limit can be
/// configured via `ExtractionConfig::max_concurrent_extractions` or defaults
/// to `(num_cpus * 1.5).ceil()`.
///
/// Each item can optionally specify a [`FileExtractionConfig`] that overrides specific
/// fields from the batch-level `config`. Pass `None` as the config to use
/// the batch-level defaults for that item.
///
/// # Arguments
///
/// * `items` - Vector of `BatchBytesItem` structs, each containing content bytes,
///   MIME type, and optional per-item configuration overrides.
/// * `config` - Batch-level extraction configuration
///
/// # Returns
///
/// A vector of `ExtractionResult` in the same order as the input items.
///
/// # Examples
///
/// Simple usage with no per-item overrides:
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::batch_extract_bytes;
/// use kreuzberg::core::config::{ExtractionConfig, BatchBytesItem};
///
/// let config = ExtractionConfig::default();
/// let items = vec![
///     BatchBytesItem { content: b"content 1".to_vec(), mime_type: "text/plain".to_string(), config: None },
///     BatchBytesItem { content: b"content 2".to_vec(), mime_type: "text/plain".to_string(), config: None },
/// ];
/// let results = batch_extract_bytes(items, &config).await?;
/// println!("Processed {} items", results.len());
/// ```
///
/// Per-item configuration overrides:
///
/// ```rust,no_run
/// use kreuzberg::core::extractor::batch_extract_bytes;
/// use kreuzberg::core::config::{ExtractionConfig, BatchBytesItem, FileExtractionConfig};
///
/// let config = ExtractionConfig::default();
/// let items = vec![
///     BatchBytesItem { content: b"content".to_vec(), mime_type: "text/plain".to_string(), config: None },
///     BatchBytesItem {
///         content: b"<html>test</html>".to_vec(),
///         mime_type: "text/html".to_string(),
///         config: Some(FileExtractionConfig { force_ocr: Some(true), ..Default::default() }),
///     },
/// ];
/// let results = batch_extract_bytes(items, &config).await?;
/// ```
public func batchExtractBytes(items: [BatchBytesItem], config: ExtractionConfig) async throws -> [ExtractionResult] {
    let _rb_items: RustVec<BatchBytesItem> = { let v = RustVec<BatchBytesItem>(); for x in items { v.push(value: x) }; return v }()
    return try await Task.detached(priority: .userInitiated) {
        let result = try RustBridge.batchExtractBytes(_rb_items, config)
        var items: [[ExtractionResult]] = []
        for ref in result {
            var item = try RustBridge.ExtractionResult(ptr: ref.ptr)
            item.isOwned = false
            items.append(item)
        }
        return items
    }.value
}

/// Detect MIME type from raw file bytes.
///
/// Uses magic byte signatures to detect file type from content.
/// Falls back to `infer` crate for comprehensive detection.
///
/// For ZIP-based files, inspects contents to distinguish Office Open XML
/// formats (DOCX, XLSX, PPTX) from plain ZIP archives.
///
/// # Arguments
///
/// * `content` - Raw file bytes
///
/// # Returns
///
/// The detected MIME type string.
///
/// # Errors
///
/// Returns `KreuzbergError::UnsupportedFormat` if MIME type cannot be determined.
public func detectMimeTypeFromBytes(content: [UInt8]) throws -> String {
    let _rb_content: RustVec<UInt8> = { let v = RustVec<UInt8>(); for b in content { v.push(value: b) }; return v }()
    return try RustBridge.detectMimeTypeFromBytes(_rb_content).toString()
}

/// Get file extensions for a given MIME type.
///
/// Returns all known file extensions that map to the specified MIME type.
///
/// # Arguments
///
/// * `mime_type` - The MIME type to look up
///
/// # Returns
///
/// A vector of file extensions (without leading dot) for the MIME type.
///
/// # Example
///
/// ```
/// use kreuzberg::core::mime::get_extensions_for_mime;
///
/// let extensions = get_extensions_for_mime("application/pdf").unwrap();
/// assert_eq!(extensions, vec!["pdf"]);
///
/// let doc_extensions = get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document").unwrap();
/// assert!(doc_extensions.contains(&"docx".to_string()));
/// ```
public func getExtensionsForMime(mimeType: String) throws -> [String] {
    return try RustBridge.getExtensionsForMime(mimeType).map { $0.as_str().toString() }
}

/// List the names of all registered embedding backends.
///
/// Used by `kreuzberg-cli`, the api/mcp endpoints, and generated language
/// bindings.
public func listEmbeddingBackends() throws -> [String] {
    return try RustBridge.listEmbeddingBackends().map { $0.as_str().toString() }
}

/// List names of all registered document extractors.
public func listDocumentExtractors() throws -> [String] {
    return try RustBridge.listDocumentExtractors().map { $0.as_str().toString() }
}

/// List all registered OCR backends.
///
/// Returns the names of all OCR backends currently registered in the global registry.
///
/// # Returns
///
/// A vector of OCR backend names.
///
/// # Example
///
/// ```rust
/// use kreuzberg::plugins::list_ocr_backends;
///
/// let backends = list_ocr_backends()?;
/// for name in backends {
///     println!("Registered OCR backend: {}", name);
/// }
/// ```
public func listOcrBackends() throws -> [String] {
    return try RustBridge.listOcrBackends().map { $0.as_str().toString() }
}

/// List all registered post-processor names.
///
/// Returns a vector of all post-processor names currently registered in the
/// global registry.
///
/// # Returns
///
/// - `Ok(Vec<String>)` - Vector of post-processor names
/// - `Err(...)` if the registry lock is poisoned
///
/// # Example
///
/// ```rust
/// use kreuzberg::plugins::list_post_processors;
///
/// let processors = list_post_processors()?;
/// for name in processors {
///     println!("Registered post-processor: {}", name);
/// }
/// ```
public func listPostProcessors() throws -> [String] {
    return try RustBridge.listPostProcessors().map { $0.as_str().toString() }
}

/// List names of all registered renderers.
///
/// # Errors
///
/// Returns an error if the registry lock is poisoned.
public func listRenderers() throws -> [String] {
    return try RustBridge.listRenderers().map { $0.as_str().toString() }
}

/// List names of all registered validators.
public func listValidators() throws -> [String] {
    return try RustBridge.listValidators().map { $0.as_str().toString() }
}

/// Compare two extraction results and return a structured diff.
///
/// The comparison is purely structural — no I/O, no side effects. All fields
/// of [`ExtractionDiff`] are populated according to the provided [`DiffOptions`].
///
/// # Arguments
///
/// * `a` — the "before" extraction result
/// * `b` — the "after" extraction result
/// * `opts` — controls which sections are compared and optional truncation
///
/// # Example
///
/// ```rust,no_run
/// use kreuzberg::{ExtractionResult, diff::{compare, DiffOptions}};
///
/// let mut a = ExtractionResult::default();
/// let mut b = ExtractionResult::default();
/// a.content = "Hello world".to_string();
/// b.content = "Hello Rust".to_string();
///
/// let diff = compare(&a, &b, &DiffOptions::default());
/// assert_eq!(diff.content_diff.len(), 1);
/// ```
public func compare(a: ExtractionResult, b: ExtractionResult, opts: DiffOptions) throws -> ExtractionDiff {
    let _rb_opts = try opts.intoRust()
    return RustBridge.compare(a, b, _rb_opts)
}

/// Generate embeddings asynchronously for a list of text strings.
///
/// This is the async counterpart to [`embed_texts`]. It offloads the blocking
/// ONNX inference work to a dedicated blocking thread pool via Tokio's
/// `spawn_blocking`, keeping the async executor free.
///
/// Returns one embedding vector per input text in the same order.
///
/// # Arguments
///
/// * `texts` - Vec of strings to embed (owned, sent to blocking thread)
/// * `config` - Embedding configuration specifying model, batch size, and normalization
///
/// # Errors
///
/// - `KreuzbergError::MissingDependency` if ONNX Runtime is not installed
/// - `KreuzbergError::Embedding` if the preset name is unknown, model download fails,
///   or the blocking inference task panics
///
/// # Example
///
/// ```rust,ignore
/// use kreuzberg::{embed_texts_async, EmbeddingConfig};
///
/// let embeddings = embed_texts_async(
///     vec!["Hello!".to_string()],
///     &EmbeddingConfig::default(),
/// ).await?;
/// ```
public func embedTextsAsync(texts: [String], config: EmbeddingConfig) async throws -> [[Float]] {
    let _rb_texts: RustVec<RustString> = { let v = RustVec<RustString>(); for s in texts { v.push(value: RustString(s)) }; return v }()
    return try await Task.detached(priority: .userInitiated) {
        let _rb_result = try RustBridge.embedTextsAsync(_rb_texts, config).toString()
        let _rb_data = _rb_result.data(using: .utf8) ?? Data()
        return try JSONDecoder().decode([[Float]].self, from: _rb_data)
    }.value
}

/// Render a single PDF page to PNG bytes.
///
/// Returns raw PNG-encoded bytes for the specified page at the given DPI.
/// Uses pdf_oxide with tiny-skia for pure-Rust rendering.
///
/// # Arguments
///
/// * `pdf_bytes` - Raw PDF file bytes
/// * `page_index` - Zero-based page index
/// * `dpi` - Resolution in dots per inch (default: 150)
/// * `password` - Optional password for encrypted PDFs
///
/// # Errors
///
/// Returns `KreuzbergError::Parsing` if the PDF cannot be opened, authenticated,
/// or rendered, or if `page_index` is out of range.
public func renderPdfPageToPng(pdfBytes: [UInt8], pageIndex: UInt, dpi: Int32?, password: String?) throws -> [UInt8] {
    let _rb_pdfBytes: RustVec<UInt8> = { let v = RustVec<UInt8>(); for b in pdfBytes { v.push(value: b) }; return v }()
    return try RustBridge.renderPdfPageToPng(_rb_pdfBytes, pageIndex, dpi, password).map { $0 }
}

/// Detect the MIME type of a file at the given path.
///
/// Uses the file extension and optionally the file content to determine the MIME type.
/// Set `check_exists` to `true` to verify the file exists before detection.
public func detectMimeType(path: String, checkExists: Bool) throws -> String {
    return try RustBridge.detectMimeType(path, checkExists).toString()
}

/// Embed a list of texts using the configured embedding model.
///
/// Returns a 2D vector where each inner vector is the embedding for the corresponding text.
public func embedTexts(texts: [String], config: EmbeddingConfig) throws -> [[Float]] {
    let _rb_texts: RustVec<RustString> = { let v = RustVec<RustString>(); for s in texts { v.push(value: RustString(s)) }; return v }()
    let _rb_json = try RustBridge.embedTexts(_rb_texts, config).toString()
    let _rb_data = _rb_json.data(using: .utf8) ?? Data()
    return try JSONDecoder().decode([[Float]].self, from: _rb_data)
}

/// Get an embedding preset by name.
///
/// Returns `None` if no preset with the given name exists. Returns an owned
/// clone so the value is safe to pass across FFI boundaries.
public func getEmbeddingPreset(name: String) throws -> EmbeddingPreset? {
    return try RustBridge.getEmbeddingPreset(name).map { try EmbeddingPreset($0) }
}

/// List the names of all available embedding presets.
///
/// Returns owned `String`s so the values are safe to pass across FFI boundaries.
public func listEmbeddingPresets() -> [String] {
    return RustBridge.listEmbeddingPresets().map { $0.as_str().toString() }
}

// MARK: - Trait Bridge Registration Forwarders
// Top-level `public func` re-exports of the swift-bridge–generated
// `register_*` / `unregister_*` / `clear_*` plugin registration entry
// points so consumers do not need to `import RustBridge` for plugin work.

/// Register an inbound `OcrBackend` plugin implementation. The Swift
/// host wraps a `OcrBackend` conformer in a `SwiftOcrBackendBox` adapter
/// (see `Sources/RustBridge/Plugins.swift`); pass the wrapped instance to
/// register the plugin in the global registry.
public func registerOcrBackend(_ swiftBox: SwiftOcrBackendBox) throws {
    try RustBridge.registerOcrBackend(swiftBox)
}

/// Unregister a previously-registered `OcrBackend` plugin by name.
public func unregisterOcrBackend(_ name: String) throws {
    try RustBridge.unregisterOcrBackend(name)
}

/// Remove every registered `OcrBackend` plugin. Typically used in test teardown.
public func clearOcrBackends() throws {
    try RustBridge.clearOcrBackends()
}

/// Register an inbound `PostProcessor` plugin implementation. The Swift
/// host wraps a `PostProcessor` conformer in a `SwiftPostProcessorBox` adapter
/// (see `Sources/RustBridge/Plugins.swift`); pass the wrapped instance to
/// register the plugin in the global registry.
public func registerPostProcessor(_ swiftBox: SwiftPostProcessorBox) throws {
    try RustBridge.registerPostProcessor(swiftBox)
}

/// Unregister a previously-registered `PostProcessor` plugin by name.
public func unregisterPostProcessor(_ name: String) throws {
    try RustBridge.unregisterPostProcessor(name)
}

/// Remove every registered `PostProcessor` plugin. Typically used in test teardown.
public func clearPostProcessors() throws {
    try RustBridge.clearPostProcessors()
}

/// Register an inbound `Validator` plugin implementation. The Swift
/// host wraps a `Validator` conformer in a `SwiftValidatorBox` adapter
/// (see `Sources/RustBridge/Plugins.swift`); pass the wrapped instance to
/// register the plugin in the global registry.
public func registerValidator(_ swiftBox: SwiftValidatorBox) throws {
    try RustBridge.registerValidator(swiftBox)
}

/// Unregister a previously-registered `Validator` plugin by name.
public func unregisterValidator(_ name: String) throws {
    try RustBridge.unregisterValidator(name)
}

/// Remove every registered `Validator` plugin. Typically used in test teardown.
public func clearValidators() throws {
    try RustBridge.clearValidators()
}

/// Register an inbound `EmbeddingBackend` plugin implementation. The Swift
/// host wraps a `EmbeddingBackend` conformer in a `SwiftEmbeddingBackendBox` adapter
/// (see `Sources/RustBridge/Plugins.swift`); pass the wrapped instance to
/// register the plugin in the global registry.
public func registerEmbeddingBackend(_ swiftBox: SwiftEmbeddingBackendBox) throws {
    try RustBridge.registerEmbeddingBackend(swiftBox)
}

/// Unregister a previously-registered `EmbeddingBackend` plugin by name.
public func unregisterEmbeddingBackend(_ name: String) throws {
    try RustBridge.unregisterEmbeddingBackend(name)
}

/// Remove every registered `EmbeddingBackend` plugin. Typically used in test teardown.
public func clearEmbeddingBackends() throws {
    try RustBridge.clearEmbeddingBackends()
}

/// Register an inbound `DocumentExtractor` plugin implementation. The Swift
/// host wraps a `DocumentExtractor` conformer in a `SwiftDocumentExtractorBox` adapter
/// (see `Sources/RustBridge/Plugins.swift`); pass the wrapped instance to
/// register the plugin in the global registry.
public func registerDocumentExtractor(_ swiftBox: SwiftDocumentExtractorBox) throws {
    try RustBridge.registerDocumentExtractor(swiftBox)
}

/// Unregister a previously-registered `DocumentExtractor` plugin by name.
public func unregisterDocumentExtractor(_ name: String) throws {
    try RustBridge.unregisterDocumentExtractor(name)
}

/// Remove every registered `DocumentExtractor` plugin. Typically used in test teardown.
public func clearDocumentExtractors() throws {
    try RustBridge.clearDocumentExtractors()
}

/// Register an inbound `Renderer` plugin implementation. The Swift
/// host wraps a `Renderer` conformer in a `SwiftRendererBox` adapter
/// (see `Sources/RustBridge/Plugins.swift`); pass the wrapped instance to
/// register the plugin in the global registry.
public func registerRenderer(_ swiftBox: SwiftRendererBox) throws {
    try RustBridge.registerRenderer(swiftBox)
}

/// Unregister a previously-registered `Renderer` plugin by name.
public func unregisterRenderer(_ name: String) throws {
    try RustBridge.unregisterRenderer(name)
}

/// Remove every registered `Renderer` plugin. Typically used in test teardown.
public func clearRenderers() throws {
    try RustBridge.clearRenderers()
}

// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ContentFilterConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ExtractionConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ImageExtractionConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.TokenReductionOptions: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.LanguageDetectionConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.HtmlOutputConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.LayoutDetectionConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.OcrQualityThresholds: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.OcrConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.PageConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.PdfConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.HierarchyConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.PostProcessorConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ChunkingConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.EmbeddingConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.TreeSitterConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.TreeSitterProcessConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ServerConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.SecurityLimits: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.TokenReductionConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.DocumentStructure: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ExtractionResult: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ImagePreprocessingConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.TesseractConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.DiffOptions: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.ExtractionDiff: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.EmbeddingPreset: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.YakeParams: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.RakeParams: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.KeywordConfig: @unchecked Sendable {}
// swift-bridge opaque type used across Task.detached boundaries — Rust type is Send + Sync.
extension RustBridge.PaddleOcrConfig: @unchecked Sendable {}