# This file is auto-generated by alef. DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 language = "C" include_guard = "KREUZBERG_H" pragma_once = true autogen_warning = "/* This file is auto-generated by alef. DO NOT EDIT. */" after_includes = """ /* Opaque type forward declarations */ /** * Hardware acceleration configuration for ONNX Runtime models. * * Controls which execution provider (CPU, CoreML, CUDA, TensorRT) is used * for inference in layout detection and embedding generation. * \\code * use kreuzberg::AccelerationConfig; * * // Auto-select: CoreML on macOS, CUDA on Linux, CPU elsewhere * let config = AccelerationConfig::default(); * * // Force CPU only * let config = AccelerationConfig { * provider: kreuzberg::ExecutionProviderType::Cpu, * ..Default::default() * }; * \\endcode */ typedef struct KREUZBERGAccelerationConfig KREUZBERGAccelerationConfig; /** * Types of inline text annotations. */ typedef struct KREUZBERGAnnotationKind KREUZBERGAnnotationKind; /** * A single file extracted from an archive. * * When archives (ZIP, TAR, 7Z, GZIP) are extracted with recursive extraction * enabled, each processable file produces its own full `ExtractionResult`. */ typedef struct KREUZBERGArchiveEntry KREUZBERGArchiveEntry; /** * Archive (ZIP/TAR/7Z) metadata. * * Extracted from compressed archive files containing file lists and size information. */ typedef struct KREUZBERGArchiveMetadata KREUZBERGArchiveMetadata; /** * Bounding box in original image coordinates (x1, y1) top-left, (x2, y2) bottom-right. */ typedef struct KREUZBERGBBox KREUZBERGBBox; /** * Batch item for byte array extraction. * * Used with `batch_extract_bytes` and `batch_extract_bytes_sync` * to represent a single item in a batch extraction job. */ typedef struct KREUZBERGBatchBytesItem KREUZBERGBatchBytesItem; /** * Batch item for file extraction. * * Used with `batch_extract_files` and `batch_extract_files_sync` * to represent a single file in a batch extraction job. */ typedef struct KREUZBERGBatchFileItem KREUZBERGBatchFileItem; /** * BibTeX bibliography metadata. */ typedef struct KREUZBERGBibtexMetadata KREUZBERGBibtexMetadata; /** * Types of block-level elements in Djot. */ typedef struct KREUZBERGBlockType KREUZBERGBlockType; /** * Bounding box coordinates for element positioning. */ typedef struct KREUZBERGBoundingBox KREUZBERGBoundingBox; typedef struct KREUZBERGCacheStats KREUZBERGCacheStats; /** * A single changed cell within a table. * * Defined here (rather than only in `crate::diff`) so `RevisionDelta` can * reference it unconditionally, without requiring the `diff` Cargo feature. * `crate::diff` re-exports this type verbatim. */ typedef struct KREUZBERGCellChange KREUZBERGCellChange; /** * A text chunk with optional embedding and metadata. * * Chunks are created when chunking is enabled in `ExtractionConfig`. Each chunk * contains the text content, optional embedding vector (if embedding generation * is configured), and metadata about its position in the document. */ typedef struct KREUZBERGChunk KREUZBERGChunk; /** * Metadata about a chunk's position in the original document. */ typedef struct KREUZBERGChunkMetadata KREUZBERGChunkMetadata; /** * How chunk size is measured. * * Defaults to `Characters` (Unicode character count). When using token-based sizing, * chunks are sized by token count according to the specified tokenizer. * * Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer * available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers * (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`). */ typedef struct KREUZBERGChunkSizing KREUZBERGChunkSizing; /** * Semantic structural classification of a text chunk. * * Assigned by the heuristic classifier in `chunking::classifier`. * Defaults to `Unknown` when no rule matches. * Designed to be extended in future versions without breaking changes. */ typedef struct KREUZBERGChunkType KREUZBERGChunkType; /** * Type of text chunker to use. * * # Variants * * * `Text` - Generic text splitter, splits on whitespace and punctuation * * `Markdown` - Markdown-aware splitter, preserves formatting and structure * * `Yaml` - YAML-aware splitter, creates one chunk per top-level key * * `Semantic` - Topic-aware chunker. With an `EmbeddingConfig`, splits at * embedding-based topic shifts tuned by `topic_threshold` (default 0.75, * lower = more splits). Without an embedding, falls back to a * structural-boundary heuristic (ALL-CAPS headers, numbered sections, * blank-line paragraphs) and merges groups into chunks capped at * `max_characters` (default 1000). `topic_threshold` has no effect in the * fallback path. For best results, pair with an embedding model. */ typedef struct KREUZBERGChunkerType KREUZBERGChunkerType; /** * Chunking configuration. * * Configures text chunking for document content, including chunk size, * overlap, trimming behavior, and optional embeddings. * * Use `..Default::default()` when constructing to allow for future field additions: * ```rust * let config = ChunkingConfig { * max_characters: 500, * ..Default::default() * }; * ``` */ typedef struct KREUZBERGChunkingConfig KREUZBERGChunkingConfig; /** * Citation file metadata (RIS, PubMed, EndNote). */ typedef struct KREUZBERGCitationMetadata KREUZBERGCitationMetadata; /** * Content rendering mode for code extraction. * * Controls how extracted code content is represented in the `content` field * of `ExtractionResult`. */ typedef struct KREUZBERGCodeContentMode KREUZBERGCodeContentMode; /** * Cross-extractor content filtering configuration. * * Controls whether "furniture" content (headers, footers, page numbers, * watermarks, repeating text) is included in or stripped from extraction * results. Applies across all extractors (PDF, DOCX, RTF, ODT, HTML, etc.) * with format-specific implementation. * * When `None` on `ExtractionConfig`, each extractor uses its current * default behavior unchanged. */ typedef struct KREUZBERGContentFilterConfig KREUZBERGContentFilterConfig; /** * Content layer classification for document nodes. * * Replaces separate body/furniture arrays with per-node granularity. */ typedef struct KREUZBERGContentLayer KREUZBERGContentLayer; /** * JATS contributor with role. */ typedef struct KREUZBERGContributorRole KREUZBERGContributorRole; /** * Dublin Core metadata from docProps/core.xml * * Contains standard metadata fields defined by the Dublin Core standard * and Office-specific extensions. */ typedef struct KREUZBERGCoreProperties KREUZBERGCoreProperties; /** * CSV/TSV file metadata. */ typedef struct KREUZBERGCsvMetadata KREUZBERGCsvMetadata; /** * dBASE field information. */ typedef struct KREUZBERGDbfFieldInfo KREUZBERGDbfFieldInfo; /** * dBASE (DBF) file metadata. */ typedef struct KREUZBERGDbfMetadata KREUZBERGDbfMetadata; /** * MIME type detection response. */ typedef struct KREUZBERGDetectResponse KREUZBERGDetectResponse; /** * Page-level detection result containing all detections and page metadata. */ typedef struct KREUZBERGDetectionResult KREUZBERGDetectionResult; /** * A single contiguous hunk in a unified diff. */ typedef struct KREUZBERGDiffHunk KREUZBERGDiffHunk; /** * A single line in a unified-diff hunk. * * Defined here (rather than only in `crate::diff`) so `RevisionDelta` can * reference it unconditionally, without requiring the `diff` Cargo feature. * `crate::diff` re-exports this type verbatim. */ typedef struct KREUZBERGDiffLine KREUZBERGDiffLine; /** * Options controlling how two `ExtractionResult` values are compared. */ typedef struct KREUZBERGDiffOptions KREUZBERGDiffOptions; /** * Comprehensive Djot document structure with semantic preservation. * * This type captures the full richness of Djot markup, including: * - Block-level structures (headings, lists, blockquotes, code blocks, etc.) * - Inline formatting (emphasis, strong, highlight, subscript, superscript, etc.) * - Attributes (classes, IDs, key-value pairs) * - Links, images, footnotes * - Math expressions (inline and display) * - Tables with full structure * * Available when the `djot` feature is enabled. */ typedef struct KREUZBERGDjotContent KREUZBERGDjotContent; /** * Image element in Djot. */ typedef struct KREUZBERGDjotImage KREUZBERGDjotImage; /** * Link element in Djot. */ typedef struct KREUZBERGDjotLink KREUZBERGDjotLink; /** * Trait for document extractor plugins. * * Implement this trait to add support for new document formats or to override * built-in extraction behavior with custom logic. * * # Return Type * * Extractors return `InternalDocument`, a flat intermediate representation. * The pipeline converts this into the public `ExtractionResult` via the * derivation step. * * # Priority System * * When multiple extractors support the same MIME type, the registry selects * the extractor with the highest priority value. Use this to: * - Override built-in extractors (priority > 50) * - Provide fallback extractors (priority < 50) * - Implement specialized extractors for specific use cases * * Default priority is 50. * * # Thread Safety * * Extractors must be thread-safe (`Send + Sync`) to support concurrent extraction. * \\code * use kreuzberg::plugins::{Plugin, DocumentExtractor}; * use kreuzberg::{Result, ExtractionConfig}; * use kreuzberg::types::internal::InternalDocument; * use async_trait::async_trait; * use std::path::Path; * * /// Custom PDF extractor with premium features * struct PremiumPdfExtractor; * * impl Plugin for PremiumPdfExtractor { * fn name(&self) -> &str { "premium-pdf" } * fn version(&self) -> String { "2.0.0".to_string() } * fn initialize(&self) -> Result<()> { Ok(()) } * fn shutdown(&self) -> Result<()> { Ok(()) } * } * * #[async_trait] * impl DocumentExtractor for PremiumPdfExtractor { * async fn extract_bytes(&self, content: &[u8], mime_type: &str, config: &ExtractionConfig) * -> Result { * // Premium extraction logic with better accuracy * let mut doc = InternalDocument::new("pdf"); * // ... populate doc.elements, doc.metadata, etc. * Ok(doc) * } * * fn supported_mime_types(&self) -> &[&str] { * &["application/pdf"] * } * * fn priority(&self) -> i32 { * 100 // Higher than default (50) - will be preferred * } * } * \\endcode */ typedef struct KREUZBERGDocumentExtractor KREUZBERGDocumentExtractor; /** * A single node in the document tree. * * Each node has deterministic `id`, typed `content`, optional `parent`/`children` * for tree structure, and metadata like page number, bounding box, and content layer. */ typedef struct KREUZBERGDocumentNode KREUZBERGDocumentNode; /** * A resolved relationship between two nodes in the document tree. */ typedef struct KREUZBERGDocumentRelationship KREUZBERGDocumentRelationship; /** * A single tracked change embedded in a document. * * Populated by per-format extractors that understand change-tracking metadata * (DOCX `w:ins`/`w:del`/`w:rPrChange`, ODT `text:change-*`, …). Every * extractor defaults to `ExtractionResult.revisions = None` until a * format-specific implementation is added. */ typedef struct KREUZBERGDocumentRevision KREUZBERGDocumentRevision; /** * Top-level structured document representation. * * A flat array of nodes with index-based parent/child references forming a tree. * Root-level nodes have `parent: None`. Use `body_roots()` and `furniture_roots()` * to iterate over top-level content by layer. * * # Validation * * Call `validate()` after construction to verify all node indices are in bounds * and parent-child relationships are bidirectionally consistent. */ typedef struct KREUZBERGDocumentStructure KREUZBERGDocumentStructure; /** * Application properties from docProps/app.xml for DOCX * * Contains Word-specific document statistics and metadata. */ typedef struct KREUZBERGDocxAppProperties KREUZBERGDocxAppProperties; /** * Word document metadata. * * Extracted from DOCX files using shared Office Open XML metadata extraction. * Integrates with `office_metadata` module for core/app/custom properties. */ typedef struct KREUZBERGDocxMetadata KREUZBERGDocxMetadata; /** * Semantic element extracted from document. * * Represents a logical unit of content with semantic classification, * unique identifier, and metadata for tracking origin and position. */ typedef struct KREUZBERGElement KREUZBERGElement; /** * Metadata for a semantic element. */ typedef struct KREUZBERGElementMetadata KREUZBERGElementMetadata; /** * Semantic element type classification. * * Categorizes text content into semantic units for downstream processing. * Supports the element types commonly found in Unstructured documents. */ typedef struct KREUZBERGElementType KREUZBERGElementType; /** * Email attachment representation. * * Contains metadata and optionally the content of an email attachment. */ typedef struct KREUZBERGEmailAttachment KREUZBERGEmailAttachment; /** * Configuration for email extraction. */ typedef struct KREUZBERGEmailConfig KREUZBERGEmailConfig; /** * Email extraction result. * * Complete representation of an extracted email message (.eml or .msg) * including headers, body content, and attachments. */ typedef struct KREUZBERGEmailExtractionResult KREUZBERGEmailExtractionResult; /** * Email metadata extracted from .eml and .msg files. * * Includes sender/recipient information, message ID, and attachment list. */ typedef struct KREUZBERGEmailMetadata KREUZBERGEmailMetadata; /** * Changes to embedded archive children between two results. */ typedef struct KREUZBERGEmbeddedChanges KREUZBERGEmbeddedChanges; /** * Diff for a single embedded archive entry that appears in both results. */ typedef struct KREUZBERGEmbeddedDiff KREUZBERGEmbeddedDiff; /** * Embedded file descriptor extracted from the PDF name tree. */ typedef struct KREUZBERGEmbeddedFile KREUZBERGEmbeddedFile; /** * Trait for in-process embedding backend plugins. * * Async to match the convention used by `OcrBackend`, * `DocumentExtractor`, and `PostProcessor`. * Host-language bridges (PyO3, napi-rs, Rustler, extendr, magnus, ext-php-rs, * C FFI, etc.) wrap their synchronous host callables in `spawn_blocking` or the * equivalent to satisfy the async signature. * * # Thread safety * * Backends must be `Send + Sync + 'static`. They are stored in * `Arc` and called concurrently from kreuzberg's chunking * pipeline. If the backend's underlying model isn't thread-safe, the backend * itself must serialize access internally (e.g. via `Mutex`). * * # Contract * * - `embed(texts)` MUST return exactly `texts.len()` vectors, each of length * `self.dimensions()`. The dispatcher in `embed_texts` * validates this before returning to downstream consumers; a non-conforming * backend surfaces as a `KreuzbergError::Validation`, not a panic. * - `embed` may be called from any thread. Its future must be `Send` * (enforced by `async_trait` when `#[async_trait]` is used on non-WASM targets). * - `dimensions()` is called exactly once at registration, immediately after * `initialize()` succeeds. The returned value is cached by the registry and * used for all subsequent shape validation. Lazy-loading implementations can * defer model loading into `initialize()` and report the real dimension * afterwards. Later mutations of the backend's reported dimension are not * observed by kreuzberg — implementations that need to change dimension * must unregister and re-register. * - `shutdown()` (inherited from `Plugin`) may be invoked * concurrently with an in-flight `embed()` call. Implementations must * tolerate this — e.g. by letting in-flight calls finish using resources * held via the `Arc` reference, and only releasing * shared state that isn't needed by `embed`. * * # Runtime * * The synchronous `embed_texts` entry uses * [`tokio::task::block_in_place`] to await the trait's async `embed`, which * requires a multi-thread tokio runtime. Callers running inside a * `current_thread` runtime (e.g. `#[tokio::test]` without `flavor = "multi_thread"`, * or `tokio::runtime::Builder::new_current_thread()`) must use * `embed_texts_async` instead, which awaits directly without * `block_in_place`. */ typedef struct KREUZBERGEmbeddingBackend KREUZBERGEmbeddingBackend; /** * Embedding configuration for text chunks. * * Configures embedding generation using ONNX models via the vendored embedding engine. * Requires the `embeddings` feature to be enabled. */ typedef struct KREUZBERGEmbeddingConfig KREUZBERGEmbeddingConfig; /** * Embedding model types supported by Kreuzberg. */ typedef struct KREUZBERGEmbeddingModelType KREUZBERGEmbeddingModelType; /** * Preset configurations for common RAG use cases. * * Each preset combines chunk size, overlap, and embedding model * to provide an optimized configuration for specific scenarios. * * All string fields are owned `String` for FFI compatibility — instances * are safe to clone and pass across language boundaries. */ typedef struct KREUZBERGEmbeddingPreset KREUZBERGEmbeddingPreset; /** * EPUB metadata (Dublin Core extensions). */ typedef struct KREUZBERGEpubMetadata KREUZBERGEpubMetadata; /** * Error metadata (for batch operations). */ typedef struct KREUZBERGErrorMetadata KREUZBERGErrorMetadata; /** * Excel/spreadsheet format metadata. * * Identifies the document as a spreadsheet source via the `FormatMetadata::Excel` * discriminant. Sheet count and sheet names are stored inside this struct. */ typedef struct KREUZBERGExcelMetadata KREUZBERGExcelMetadata; /** * Single Excel worksheet. * * Represents one sheet from an Excel workbook with its content * converted to Markdown format and dimensional statistics. */ typedef struct KREUZBERGExcelSheet KREUZBERGExcelSheet; /** * Excel workbook representation. * * Contains all sheets from an Excel file (.xlsx, .xls, etc.) with * extracted content and metadata. */ typedef struct KREUZBERGExcelWorkbook KREUZBERGExcelWorkbook; /** * ONNX Runtime execution provider type. * * Determines which hardware backend is used for model inference. * `Auto` (default) selects the best available provider per platform. */ typedef struct KREUZBERGExecutionProviderType KREUZBERGExecutionProviderType; /** * Extracted image from a document. * * Contains raw image data, metadata, and optional nested OCR results. * Raw bytes allow cross-language compatibility - users can convert to * PIL.Image (Python), Sharp (Node.js), or other formats as needed. */ typedef struct KREUZBERGExtractedImage KREUZBERGExtractedImage; /** * A URI extracted from a document. * * Represents any link, reference, or resource pointer found during extraction. * The `kind` field classifies the URI semantically, while `label` carries * optional human-readable display text. */ typedef struct KREUZBERGExtractedUri KREUZBERGExtractedUri; /** * Main extraction configuration. * * This struct contains all configuration options for the extraction process. * It can be loaded from TOML, YAML, or JSON files, or created programmatically. * \\code * use kreuzberg::core::config::ExtractionConfig; * * // Create with defaults * let config = ExtractionConfig::default(); * * // Load from TOML file * // let config = ExtractionConfig::from_toml_file("kreuzberg.toml")?; * \\endcode */ typedef struct KREUZBERGExtractionConfig KREUZBERGExtractionConfig; /** * The complete diff between two `ExtractionResult` values. */ typedef struct KREUZBERGExtractionDiff KREUZBERGExtractionDiff; /** * How the extracted text was produced. */ typedef struct KREUZBERGExtractionMethod KREUZBERGExtractionMethod; /** * General extraction result used by the core extraction API. * * This is the main result type returned by all extraction functions. */ typedef struct KREUZBERGExtractionResult KREUZBERGExtractionResult; /** * FictionBook (FB2) metadata. */ typedef struct KREUZBERGFictionBookMetadata KREUZBERGFictionBookMetadata; /** * Per-file extraction configuration overrides for batch processing. * * All fields are `Option` — `None` means "use the batch-level default." * This type is used with `batch_extract_files` and * `batch_extract_bytes` to allow heterogeneous * extraction settings within a single batch. * * # Excluded Fields * * The following `ExtractionConfig` fields are batch-level only and * cannot be overridden per file: * - `max_concurrent_extractions` — controls batch parallelism * - `use_cache` — global caching policy * - `acceleration` — shared ONNX execution provider * - `security_limits` — global archive security policy * \\code * use kreuzberg::FileExtractionConfig; * * // Override just OCR forcing for a specific file * let config = FileExtractionConfig { * force_ocr: Some(true), * ..Default::default() * }; * \\endcode */ typedef struct KREUZBERGFileExtractionConfig KREUZBERGFileExtractionConfig; /** * Footnote in Djot. */ typedef struct KREUZBERGFootnote KREUZBERGFootnote; /** * Format-specific metadata (discriminated union). * * Only one format type can exist per extraction result. This provides * type-safe, clean metadata without nested optionals. */ typedef struct KREUZBERGFormatMetadata KREUZBERGFormatMetadata; /** * Block-level element in a Djot document. * * Represents structural elements like headings, paragraphs, lists, code blocks, etc. */ typedef struct KREUZBERGFormattedBlock KREUZBERGFormattedBlock; /** * Individual grid cell with position and span metadata. */ typedef struct KREUZBERGGridCell KREUZBERGGridCell; /** * Header/heading element metadata. */ typedef struct KREUZBERGHeaderMetadata KREUZBERGHeaderMetadata; /** * Heading context for a chunk within a Markdown document. * * Contains the heading hierarchy from document root to this chunk's section. */ typedef struct KREUZBERGHeadingContext KREUZBERGHeadingContext; /** * A single heading in the hierarchy. */ typedef struct KREUZBERGHeadingLevel KREUZBERGHeadingLevel; /** * A text block with hierarchy level assignment. * * Represents a block of text with semantic heading information extracted from * font size clustering and hierarchical analysis. */ typedef struct KREUZBERGHierarchicalBlock KREUZBERGHierarchicalBlock; /** * Hierarchy extraction configuration for PDF text structure analysis. * * Enables extraction of document hierarchy levels (H1-H6) based on font size * clustering and semantic analysis. When enabled, hierarchical blocks are * included in page content. */ typedef struct KREUZBERGHierarchyConfig KREUZBERGHierarchyConfig; /** * HTML metadata extracted from HTML documents. * * Includes document-level metadata, Open Graph data, Twitter Card metadata, * and extracted structural elements (headers, links, images, structured data). */ typedef struct KREUZBERGHtmlMetadata KREUZBERGHtmlMetadata; /** * Configuration for styled HTML output. * * When set on [`ExtractionConfig::html_output`] alongside * `output_format = OutputFormat::Html`, the pipeline builds a * `StyledHtmlRenderer` (crate::rendering::StyledHtmlRenderer) instead of * the plain comrak-based renderer. * \\code * use kreuzberg::core::config::{HtmlOutputConfig, HtmlTheme}; * * let config = HtmlOutputConfig { * theme: HtmlTheme::GitHub, * css: Some(".kb-p { font-size: 1.1rem; }".to_string()), * ..Default::default() * }; * \\endcode */ typedef struct KREUZBERGHtmlOutputConfig KREUZBERGHtmlOutputConfig; /** * Built-in HTML theme selection. */ typedef struct KREUZBERGHtmlTheme KREUZBERGHtmlTheme; /** * Image extraction configuration. */ typedef struct KREUZBERGImageExtractionConfig KREUZBERGImageExtractionConfig; /** * Heuristic classification of what an image likely depicts. */ typedef struct KREUZBERGImageKind KREUZBERGImageKind; /** * Image metadata extracted from image files. * * Includes dimensions, format, and EXIF data. */ typedef struct KREUZBERGImageMetadata KREUZBERGImageMetadata; /** * Image element metadata. */ typedef struct KREUZBERGImageMetadataType KREUZBERGImageMetadataType; /** * Image preprocessing configuration for OCR. * * These settings control how images are preprocessed before OCR to improve * text recognition quality. Different preprocessing strategies work better * for different document types. */ typedef struct KREUZBERGImagePreprocessingConfig KREUZBERGImagePreprocessingConfig; /** * Image preprocessing metadata. * * Tracks the transformations applied to an image during OCR preprocessing, * including DPI normalization, resizing, and resampling. */ typedef struct KREUZBERGImagePreprocessingMetadata KREUZBERGImagePreprocessingMetadata; /** * Image type classification. */ typedef struct KREUZBERGImageType KREUZBERGImageType; /** * Inline element within a block. * * Represents text with formatting, links, images, etc. */ typedef struct KREUZBERGInlineElement KREUZBERGInlineElement; /** * Types of inline elements in Djot. */ typedef struct KREUZBERGInlineType KREUZBERGInlineType; /** * JATS (Journal Article Tag Suite) metadata. */ typedef struct KREUZBERGJatsMetadata KREUZBERGJatsMetadata; /** * Extracted keyword with metadata. */ typedef struct KREUZBERGKeyword KREUZBERGKeyword; /** * Keyword algorithm selection. */ typedef struct KREUZBERGKeywordAlgorithm KREUZBERGKeywordAlgorithm; /** * Keyword extraction configuration. */ typedef struct KREUZBERGKeywordConfig KREUZBERGKeywordConfig; /** * Language detection configuration. */ typedef struct KREUZBERGLanguageDetectionConfig KREUZBERGLanguageDetectionConfig; /** * The 17 canonical document layout classes. * * All model backends (RT-DETR, YOLO, etc.) map their native class IDs * to this shared set. Models with fewer classes (DocLayNet: 11, PubLayNet: 5) * map to the closest equivalent. * * Wire format is snake_case in all serializers (JSON, TOML, YAML). */ typedef struct KREUZBERGLayoutClass KREUZBERGLayoutClass; /** * A single layout detection result. */ typedef struct KREUZBERGLayoutDetection KREUZBERGLayoutDetection; /** * Layout detection configuration. * * Controls layout detection behavior in the extraction pipeline. * When set on `ExtractionConfig` (super::ExtractionConfig), layout detection * is enabled for PDF extraction. */ typedef struct KREUZBERGLayoutDetectionConfig KREUZBERGLayoutDetectionConfig; /** * A detected layout region on a page. * * When layout detection is enabled, each page may have layout regions * identifying different content types (text, pictures, tables, etc.) * with confidence scores and spatial positions. */ typedef struct KREUZBERGLayoutRegion KREUZBERGLayoutRegion; /** * Link element metadata. */ typedef struct KREUZBERGLinkMetadata KREUZBERGLinkMetadata; /** * Link type classification. */ typedef struct KREUZBERGLinkType KREUZBERGLinkType; /** * Type of list detection. */ typedef struct KREUZBERGListType KREUZBERGListType; /** * Configuration for an LLM provider/model via liter-llm. * * Each feature (VLM OCR, VLM embeddings, structured extraction) carries * its own `LlmConfig`, allowing different providers per feature. * \\code * [structured_extraction.llm] * model = "openai/gpt-4o" * api_key = "sk-..." # or use KREUZBERG_LLM_API_KEY env var * \\endcode */ typedef struct KREUZBERGLlmConfig KREUZBERGLlmConfig; /** * Token usage and cost data for a single LLM call made during extraction. * * Populated when VLM OCR, structured extraction, or LLM-based embeddings * are used. Multiple entries may be present when multiple LLM calls occur * within one extraction (e.g. VLM OCR + structured extraction). */ typedef struct KREUZBERGLlmUsage KREUZBERGLlmUsage; /** * Extraction result metadata. * * Contains common fields applicable to all formats, format-specific metadata * via a discriminated union, and additional custom fields from postprocessors. */ typedef struct KREUZBERGMetadata KREUZBERGMetadata; /** * Combined paths to all models needed for OCR (backward compatibility). */ typedef struct KREUZBERGModelPaths KREUZBERGModelPaths; /** * Tagged enum for node content. Each variant carries only type-specific data. * * Uses `#[serde(tag = "node_type")]` to avoid "type" keyword collision in * Go/Java/TypeScript bindings. */ typedef struct KREUZBERGNodeContent KREUZBERGNodeContent; /** * Trait for OCR backend plugins. * * Implement this trait to add custom OCR capabilities. OCR backends can be: * - Native Rust implementations (like Tesseract) * - FFI bridges to Python libraries (like EasyOCR, PaddleOCR) * - Cloud-based OCR services (Google Vision, AWS Textract, etc.) * * # Thread Safety * * OCR backends must be thread-safe (`Send + Sync`) to support concurrent processing. * \\code * use kreuzberg::plugins::{Plugin, OcrBackend, OcrBackendType}; * use kreuzberg::{Result, OcrConfig}; * use async_trait::async_trait; * use std::borrow::Cow; * use std::path::Path; * use kreuzberg::types::{ExtractionResult, Metadata}; * * struct CustomOcrBackend; * * impl Plugin for CustomOcrBackend { * fn name(&self) -> &str { "custom-ocr" } * fn version(&self) -> String { "1.0.0".to_string() } * fn initialize(&self) -> Result<()> { Ok(()) } * fn shutdown(&self) -> Result<()> { Ok(()) } * } * * #[async_trait] * impl OcrBackend for CustomOcrBackend { * async fn process_image(&self, image_bytes: &[u8], config: &OcrConfig) -> Result { * // Implement OCR logic here * Ok(ExtractionResult { * content: "Extracted text".to_string(), * mime_type: Cow::Borrowed("text/plain"), * ..Default::default() * }) * } * * async fn process_image_file(&self, path: &Path, config: &OcrConfig) -> Result { * let bytes = std::fs::read(path)?; * self.process_image(&bytes, config).await * } * * fn supports_language(&self, lang: &str) -> bool { * matches!(lang, "eng" | "deu" | "fra") * } * * fn backend_type(&self) -> OcrBackendType { * OcrBackendType::Custom * } * } * \\endcode */ typedef struct KREUZBERGOcrBackend KREUZBERGOcrBackend; /** * OCR backend types. */ typedef struct KREUZBERGOcrBackendType KREUZBERGOcrBackendType; /** * Bounding geometry for an OCR element. * * Supports both axis-aligned rectangles (from Tesseract) and 4-point quadrilaterals * (from PaddleOCR and rotated text detection). */ typedef struct KREUZBERGOcrBoundingGeometry KREUZBERGOcrBoundingGeometry; /** * Confidence scores for an OCR element. * * Separates detection confidence (how confident that text exists at this location) * from recognition confidence (how confident about the actual text content). */ typedef struct KREUZBERGOcrConfidence KREUZBERGOcrConfidence; /** * OCR configuration. */ typedef struct KREUZBERGOcrConfig KREUZBERGOcrConfig; /** * A unified OCR element representing detected text with full metadata. * * This is the primary type for structured OCR output, preserving all information * from both Tesseract and PaddleOCR backends. */ typedef struct KREUZBERGOcrElement KREUZBERGOcrElement; /** * Configuration for OCR element extraction. * * Controls how OCR elements are extracted and filtered. */ typedef struct KREUZBERGOcrElementConfig KREUZBERGOcrElementConfig; /** * Hierarchical level of an OCR element. * * Maps to Tesseract's page segmentation hierarchy and provides * equivalent semantics for PaddleOCR. */ typedef struct KREUZBERGOcrElementLevel KREUZBERGOcrElementLevel; /** * OCR extraction result. * * Result of performing OCR on an image or scanned document, * including recognized text and detected tables. */ typedef struct KREUZBERGOcrExtractionResult KREUZBERGOcrExtractionResult; /** * OCR processing metadata. * * Captures information about OCR processing configuration and results. */ typedef struct KREUZBERGOcrMetadata KREUZBERGOcrMetadata; /** * Multi-backend OCR pipeline with quality-based fallback. * * Backends are tried in priority order (highest first). After each backend * produces output, quality is evaluated. If it meets `quality_thresholds.pipeline_min_quality`, * the result is accepted. Otherwise the next backend is tried. */ typedef struct KREUZBERGOcrPipelineConfig KREUZBERGOcrPipelineConfig; /** * A single backend stage in the OCR pipeline. */ typedef struct KREUZBERGOcrPipelineStage KREUZBERGOcrPipelineStage; /** * Quality thresholds for OCR fallback decisions and pipeline quality gating. * * All fields default to the values that match the previous hardcoded behavior, * so `OcrQualityThresholds::default()` preserves existing semantics exactly. */ typedef struct KREUZBERGOcrQualityThresholds KREUZBERGOcrQualityThresholds; /** * Rotation information for an OCR element. */ typedef struct KREUZBERGOcrRotation KREUZBERGOcrRotation; /** * Table detected via OCR. * * Represents a table structure recognized during OCR processing. */ typedef struct KREUZBERGOcrTable KREUZBERGOcrTable; /** * Bounding box for an OCR-detected table in pixel coordinates. */ typedef struct KREUZBERGOcrTableBoundingBox KREUZBERGOcrTableBoundingBox; /** * Document orientation detection result. */ typedef struct KREUZBERGOrientationResult KREUZBERGOrientationResult; /** * Output format for extraction results. * * Controls the format of the `content` field in `ExtractionResult`. * When set to `Markdown`, `Djot`, or `Html`, the output uses that format. * `Plain` returns the raw extracted text. * `Structured` returns JSON with full OCR element data including bounding * boxes and confidence scores. */ typedef struct KREUZBERGOutputFormat KREUZBERGOutputFormat; /** * Page Segmentation Mode for Tesseract OCR */ typedef struct KREUZBERGPSMMode KREUZBERGPSMMode; /** * Supported languages in PaddleOCR. * * Maps user-friendly language codes to paddle-ocr-rs language identifiers. */ typedef struct KREUZBERGPaddleLanguage KREUZBERGPaddleLanguage; /** * Configuration for PaddleOCR backend. * * Configures PaddleOCR text detection and recognition with multi-language support. * Uses a builder pattern for convenient configuration. * \\code * use kreuzberg::PaddleOcrConfig; * * // Create with default English configuration * let config = PaddleOcrConfig::new("en"); * * // Create with custom cache directory * let config = PaddleOcrConfig::new("ch") * .with_cache_dir("/path/to/cache".into()); * * // Enable table detection * let config = PaddleOcrConfig::new("en") * .with_table_detection(true); * \\endcode */ typedef struct KREUZBERGPaddleOcrConfig KREUZBERGPaddleOcrConfig; /** * Byte offset boundary for a page. * * Tracks where a specific page's content starts and ends in the main content string, * enabling mapping from byte positions to page numbers. Offsets are guaranteed to be * at valid UTF-8 character boundaries when using standard String methods (push_str, push, etc.). */ typedef struct KREUZBERGPageBoundary KREUZBERGPageBoundary; /** * Page extraction and tracking configuration. * * Controls how pages are extracted, tracked, and represented in the extraction results. * When `None`, page tracking is disabled. * * Page range tracking in chunk metadata (first_page/last_page) is automatically enabled * when page boundaries are available and chunking is configured. */ typedef struct KREUZBERGPageConfig KREUZBERGPageConfig; /** * Content for a single page/slide. * * When page extraction is enabled, documents are split into per-page content * with associated tables and images mapped to each page. * * # Performance * * Uses Arc-wrapped tables and images for memory efficiency: * - `Vec>` enables zero-copy sharing of table data * - `Vec>` enables zero-copy sharing of image data * - Maintains exact JSON compatibility via custom Serialize/Deserialize * * This reduces memory overhead for documents with shared tables/images * by avoiding redundant copies during serialization. */ typedef struct KREUZBERGPageContent KREUZBERGPageContent; /** * Page hierarchy structure containing heading levels and block information. * * Used when PDF text hierarchy extraction is enabled. Contains hierarchical * blocks with heading levels (H1-H6) for semantic document structure. */ typedef struct KREUZBERGPageHierarchy KREUZBERGPageHierarchy; /** * Metadata for individual page/slide/sheet. * * Captures per-page information including dimensions, content counts, * and visibility state (for presentations). */ typedef struct KREUZBERGPageInfo KREUZBERGPageInfo; /** * Unified page structure for documents. * * Supports different page types (PDF pages, PPTX slides, Excel sheets) * with character offset boundaries for chunk-to-page mapping. */ typedef struct KREUZBERGPageStructure KREUZBERGPageStructure; /** * Type of paginated unit in a document. * * Distinguishes between different types of "pages" (PDF pages, presentation slides, spreadsheet * sheets). */ typedef struct KREUZBERGPageUnitType KREUZBERGPageUnitType; /** * A PDF annotation extracted from a document page. */ typedef struct KREUZBERGPdfAnnotation KREUZBERGPdfAnnotation; /** * Type of PDF annotation. */ typedef struct KREUZBERGPdfAnnotationType KREUZBERGPdfAnnotationType; /** * PDF-specific configuration. */ typedef struct KREUZBERGPdfConfig KREUZBERGPdfConfig; /** * PDF-specific metadata. * * Contains metadata fields specific to PDF documents that are not in the common * `Metadata` structure. Common fields like title, authors, keywords, and dates * are at the `Metadata` level. */ typedef struct KREUZBERGPdfMetadata KREUZBERGPdfMetadata; /** * Base trait that all plugins must implement. * * This trait provides common functionality for plugin lifecycle management, * identification, and metadata. * * # Thread Safety * * All plugins must be `Send + Sync` to support concurrent usage across threads. * \\code * use kreuzberg::plugins::Plugin; * use kreuzberg::Result; * use std::sync::atomic::{AtomicBool, Ordering}; * * struct MyPlugin { * initialized: AtomicBool, * } * * impl Plugin for MyPlugin { * fn name(&self) -> &str { * "my-plugin" * } * * fn version(&self) -> String { * "1.0.0".to_string() * } * * fn initialize(&self) -> Result<()> { * self.initialized.store(true, Ordering::Release); * println!("Plugin initialized!"); * Ok(()) * } * * fn shutdown(&self) -> Result<()> { * self.initialized.store(false, Ordering::Release); * println!("Plugin shutdown!"); * Ok(()) * } * } * \\endcode */ typedef struct KREUZBERGPlugin KREUZBERGPlugin; /** * Trait for post-processor plugins. * * Post-processors transform or enrich extraction results after the initial * extraction is complete. They can: * - Clean and normalize text * - Add metadata (language, keywords, entities) * - Split content into chunks * - Score quality * - Apply custom transformations * * # Processing Order * * Post-processors are executed in stage order: * 1. **Early** - Language detection, entity extraction * 2. **Middle** - Keyword extraction, token reduction * 3. **Late** - Custom hooks, final validation * * Within each stage, processors are executed in registration order. * * # Error Handling * * Post-processor errors are non-fatal by default - they're captured in metadata * and execution continues. To make errors fatal, return an error from `process()`. * * # Thread Safety * * Post-processors must be thread-safe (`Send + Sync`). * \\code * use kreuzberg::plugins::{Plugin, PostProcessor, ProcessingStage}; * use kreuzberg::{Result, ExtractionResult, ExtractionConfig}; * use async_trait::async_trait; * * /// Add word count metadata to extraction results * struct WordCountProcessor; * * impl Plugin for WordCountProcessor { * fn name(&self) -> &str { "word-count" } * fn version(&self) -> String { "1.0.0".to_string() } * fn initialize(&self) -> Result<()> { Ok(()) } * fn shutdown(&self) -> Result<()> { Ok(()) } * } * * #[async_trait] * impl PostProcessor for WordCountProcessor { * async fn process(&self, result: &mut ExtractionResult, config: &ExtractionConfig) * -> Result<()> { * // Count words * let word_count = result.content.split_whitespace().count(); * * // Add to metadata * result.metadata.additional.insert("word_count".to_string().into(), serde_json::json!(word_count)); * * Ok(()) * } * * fn processing_stage(&self) -> ProcessingStage { * ProcessingStage::Early * } * } * \\endcode */ typedef struct KREUZBERGPostProcessor KREUZBERGPostProcessor; /** * Post-processor configuration. */ typedef struct KREUZBERGPostProcessorConfig KREUZBERGPostProcessorConfig; /** * Application properties from docProps/app.xml for PPTX * * Contains PowerPoint-specific document metadata. */ typedef struct KREUZBERGPptxAppProperties KREUZBERGPptxAppProperties; /** * PowerPoint (PPTX) extraction result. * * Contains extracted slide content, metadata, and embedded images/tables. */ typedef struct KREUZBERGPptxExtractionResult KREUZBERGPptxExtractionResult; /** * PowerPoint presentation metadata. * * Extracted from PPTX files containing slide counts and presentation details. */ typedef struct KREUZBERGPptxMetadata KREUZBERGPptxMetadata; /** * Processing stages for post-processors. * * Post-processors are executed in stage order (Early → Middle → Late). * Use stages to control the order of post-processing operations. */ typedef struct KREUZBERGProcessingStage KREUZBERGProcessingStage; /** * A non-fatal warning from a processing pipeline stage. * * Captures errors from optional features that don't prevent extraction * but may indicate degraded results. */ typedef struct KREUZBERGProcessingWarning KREUZBERGProcessingWarning; /** * Outlook PST archive metadata. */ typedef struct KREUZBERGPstMetadata KREUZBERGPstMetadata; /** * RAKE-specific parameters. */ typedef struct KREUZBERGRakeParams KREUZBERGRakeParams; /** * Pre-computed table markdown for a table detection region. * * Produced by the TATR-based table structure recognizer and surfaced as part of * layout-aware OCR results. The struct lives here (under `layout-types`, pure-Rust) * so that consumers who do not enable `layout-detection` (ORT) can still reference * the type in their own code. */ typedef struct KREUZBERGRecognizedTable KREUZBERGRecognizedTable; typedef struct KREUZBERGReductionLevel KREUZBERGReductionLevel; /** * Semantic kind of a relationship between document elements. */ typedef struct KREUZBERGRelationshipKind KREUZBERGRelationshipKind; /** * Trait for document renderers that convert [`InternalDocument`] to output strings. * * Renderers are typically stateless converters that transform the internal * document representation into a specific output format (Markdown, HTML, * Djot, plain text, etc.). They participate in the standard [`Plugin`] * lifecycle so custom renderers can be registered from any supported binding * language. * * The format name is exposed via [`Plugin::name`]. For stateless renderers * the [`Plugin`] lifecycle methods (`version`, `initialize`, `shutdown`) all * take no-op defaults and need not be overridden. * * # Thread Safety * * Renderers must be `Send + Sync` (inherited from [`Plugin`]). * \\code * use kreuzberg::plugins::{Plugin, Renderer}; * use kreuzberg::types::internal::InternalDocument; * use kreuzberg::Result; * * struct CustomRenderer; * * impl Plugin for CustomRenderer { * fn name(&self) -> &str { "custom" } * } * * impl Renderer for CustomRenderer { * fn render(&self, doc: &InternalDocument) -> Result { * Ok(format!("Custom output with {} elements", doc.elements.len())) * } * } * \\endcode */ typedef struct KREUZBERGRenderer KREUZBERGRenderer; /** * Result-shape selection for extraction results. * * Distinct from `OutputFormat` (which controls rendering — Plain, Markdown, * HTML, etc.). `ResultFormat` controls the *shape* of the result: a unified content * blob vs. an element-based decomposition. */ typedef struct KREUZBERGResultFormat KREUZBERGResultFormat; /** * Best-effort document location for a revision. */ typedef struct KREUZBERGRevisionAnchor KREUZBERGRevisionAnchor; /** * The content changes that make up a single revision. * * For insertions and deletions the `content` field carries the added/removed * lines as `DiffLine::Added` / `DiffLine::Removed` entries. For format * changes, `content` is empty — the property diff is left as a TODO for a * later enrichment pass. */ typedef struct KREUZBERGRevisionDelta KREUZBERGRevisionDelta; /** * Semantic classification of a tracked change. */ typedef struct KREUZBERGRevisionKind KREUZBERGRevisionKind; /** * Configuration for security limits across extractors. * * All limits are intentionally conservative to prevent DoS attacks * while still supporting legitimate documents. */ typedef struct KREUZBERGSecurityLimits KREUZBERGSecurityLimits; /** * API server configuration. * * This struct holds all configuration options for the Kreuzberg API server, * including host/port settings, CORS configuration, and upload limits. * * # Defaults * * - `host`: "127.0.0.1" (localhost only) * - `port`: 8000 * - `cors_origins`: empty vector (allows all origins) * - `max_request_body_bytes`: 104_857_600 (100 MB) * - `max_multipart_field_bytes`: 104_857_600 (100 MB) */ typedef struct KREUZBERGServerConfig KREUZBERGServerConfig; /** * Structured data (Schema.org, microdata, RDFa) block. */ typedef struct KREUZBERGStructuredData KREUZBERGStructuredData; typedef struct KREUZBERGStructuredDataResult KREUZBERGStructuredDataResult; /** * Structured data type classification. */ typedef struct KREUZBERGStructuredDataType KREUZBERGStructuredDataType; /** * Configuration for LLM-based structured data extraction. * * Sends extracted document content to a VLM with a JSON schema, * returning structured data that conforms to the schema. * \\code * [structured_extraction] * schema_name = "invoice_data" * strict = true * * [structured_extraction.schema] * type = "object" * properties.vendor = { type = "string" } * properties.total = { type = "number" } * required = ["vendor", "total"] * * [structured_extraction.llm] * model = "openai/gpt-4o" * \\endcode */ typedef struct KREUZBERGStructuredExtractionConfig KREUZBERGStructuredExtractionConfig; /** * A supported document format entry. * * Represents a file extension and its corresponding MIME type that Kreuzberg can process. */ typedef struct KREUZBERGSupportedFormat KREUZBERGSupportedFormat; /** * Extracted table structure. * * Represents a table detected and extracted from a document (PDF, image, etc.). * Tables are converted to both structured cell data and Markdown format. */ typedef struct KREUZBERGTable KREUZBERGTable; /** * Individual table cell with content and optional styling. * * Future extension point for rich table support with cell-level metadata. */ typedef struct KREUZBERGTableCell KREUZBERGTableCell; /** * Cell-level changes for a pair of tables that share the same index. */ typedef struct KREUZBERGTableDiff KREUZBERGTableDiff; /** * Structured table grid with cell-level metadata. * * Stores row/column dimensions and a flat list of cells with position info. */ typedef struct KREUZBERGTableGrid KREUZBERGTableGrid; /** * Which table structure recognition model to use. * * Controls the model used for table cell detection within layout-detected * table regions. Wire format is snake_case in all serializers (JSON, TOML, * YAML). */ typedef struct KREUZBERGTableModel KREUZBERGTableModel; /** * Tesseract OCR configuration. * * Provides fine-grained control over Tesseract OCR engine parameters. * Most users can use the defaults, but these settings allow optimization * for specific document types (invoices, handwriting, etc.). */ typedef struct KREUZBERGTesseractConfig KREUZBERGTesseractConfig; /** * Inline text annotation — byte-range based formatting and links. * * Annotations reference byte offsets into the node's text content, * enabling precise identification of formatted regions. */ typedef struct KREUZBERGTextAnnotation KREUZBERGTextAnnotation; /** * Text direction enumeration for HTML documents. */ typedef struct KREUZBERGTextDirection KREUZBERGTextDirection; /** * Plain text and Markdown extraction result. * * Contains the extracted text along with statistics and, * for Markdown files, structural elements like headers and links. */ typedef struct KREUZBERGTextExtractionResult KREUZBERGTextExtractionResult; /** * Text/Markdown metadata. * * Extracted from plain text and Markdown files. Includes word counts and, * for Markdown, structural elements like headers and links. */ typedef struct KREUZBERGTextMetadata KREUZBERGTextMetadata; typedef struct KREUZBERGTokenReductionConfig KREUZBERGTokenReductionConfig; /** * Token reduction configuration. */ typedef struct KREUZBERGTokenReductionOptions KREUZBERGTokenReductionOptions; /** * Configuration for tree-sitter language pack integration. * * Controls grammar download behavior and code analysis options. * * # Example (TOML) * * ```toml * [tree_sitter] * languages = ["python", "rust"] * groups = ["web"] * * [tree_sitter.process] * structure = true * comments = true * docstrings = true * ``` */ typedef struct KREUZBERGTreeSitterConfig KREUZBERGTreeSitterConfig; /** * Processing options for tree-sitter code analysis. * * Controls which analysis features are enabled when extracting code files. */ typedef struct KREUZBERGTreeSitterProcessConfig KREUZBERGTreeSitterProcessConfig; /** * Semantic classification of an extracted URI. */ typedef struct KREUZBERGUriKind KREUZBERGUriKind; /** * Trait for validator plugins. * * Validators check extraction results for quality, completeness, or correctness. * Unlike post-processors, validator errors **fail fast** - if a validator returns * an error, the extraction fails immediately. * * # Use Cases * * - **Quality Gates**: Ensure extracted content meets minimum quality standards * - **Compliance**: Verify content meets regulatory requirements * - **Content Filtering**: Reject documents containing unwanted content * - **Format Validation**: Verify extracted content structure * - **Security Checks**: Scan for malicious content * * # Error Handling * * Validator errors are **fatal** - they cause the extraction to fail and bubble up * to the caller. Use validators for hard requirements that must be met. * * For non-fatal checks, use post-processors instead. * * # Thread Safety * * Validators must be thread-safe (`Send + Sync`). * \\code * use kreuzberg::plugins::{Plugin, Validator}; * use kreuzberg::{Result, ExtractionResult, ExtractionConfig, KreuzbergError}; * use async_trait::async_trait; * * /// Validate that extracted content has minimum length * struct MinimumLengthValidator { * min_length: usize, * } * * impl Plugin for MinimumLengthValidator { * fn name(&self) -> &str { "min-length-validator" } * fn version(&self) -> String { "1.0.0".to_string() } * fn initialize(&self) -> Result<()> { Ok(()) } * fn shutdown(&self) -> Result<()> { Ok(()) } * } * * #[async_trait] * impl Validator for MinimumLengthValidator { * async fn validate(&self, result: &ExtractionResult, config: &ExtractionConfig) * -> Result<()> { * if result.content.len() < self.min_length { * return Err(KreuzbergError::validation(format!( * "Content too short: {} < {} characters", * result.content.len(), * self.min_length * ))); * } * Ok(()) * } * } * \\endcode */ typedef struct KREUZBERGValidator KREUZBERGValidator; /** * Application properties from docProps/app.xml for XLSX * * Contains Excel-specific document metadata. */ typedef struct KREUZBERGXlsxAppProperties KREUZBERGXlsxAppProperties; /** * XML extraction result. * * Contains extracted text content from XML files along with * structural statistics about the XML document. */ typedef struct KREUZBERGXmlExtractionResult KREUZBERGXmlExtractionResult; /** * XML metadata extracted during XML parsing. * * Provides statistics about XML document structure. */ typedef struct KREUZBERGXmlMetadata KREUZBERGXmlMetadata; /** * YAKE-specific parameters. */ typedef struct KREUZBERGYakeParams KREUZBERGYakeParams; /** * Year range for bibliographic metadata. */ typedef struct KREUZBERGYearRange KREUZBERGYearRange; """ [defines] "target_os = windows" = "SKIF_WINDOWS" [export] prefix = "KREUZBERG" include = [] exclude = [] [fn] args = "vertical"