# This file is auto-generated by alef — DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 # To regenerate: alef generate # To verify freshness: alef verify --exit-code # Issues & docs: https://github.com/kreuzberg-dev/alef # frozen_string_literal: true require "json" require "sorbet-runtime" require "kreuzberg_rb" module Kreuzberg # How chunk size is measured. # # Defaults to `Characters` (Unicode character count). When using token-based sizing, # chunks are sized by token count according to the specified tokenizer. # # Token-based sizing uses HuggingFace tokenizers loaded at runtime. Any tokenizer # available on HuggingFace Hub can be used, including OpenAI-compatible tokenizers # (e.g., `Xenova/gpt-4o`, `Xenova/cl100k_base`). module ChunkSizing extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:type] || hash["type"] case discriminator when "characters" then ChunkSizingCharacters.from_hash(hash) when "tokenizer" then ChunkSizingTokenizer.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Size measured in Unicode characters (default). ChunkSizingCharacters = Data.define do include ChunkSizing extend T::Sig sig { returns(T::Boolean) } def characters? = true sig { returns(T::Boolean) } def tokenizer? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Size measured in tokens from a HuggingFace tokenizer. ChunkSizingTokenizer = Data.define(:model, :cache_dir) do include ChunkSizing extend T::Sig # HuggingFace model ID or path, e.g. "Xenova/gpt-4o", "bert-base-uncased". sig { returns(String) } def model = super # rubocop:disable Lint/UselessMethodDefinition # Optional cache directory override for tokenizer files. # Defaults to hf-hub's standard cache (`~/.cache/huggingface/`). # Can also be set via `KREUZBERG_TOKENIZER_CACHE_DIR` environment variable. sig { returns(T.nilable(String)) } def cache_dir = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def characters? = false sig { returns(T::Boolean) } def tokenizer? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(model: hash[:model] || hash["model"], cache_dir: hash[:cache_dir] || hash["cache_dir"]) end end end module Kreuzberg # Embedding model types supported by Kreuzberg. module EmbeddingModelType extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:type] || hash["type"] case discriminator when "preset" then EmbeddingModelTypePreset.from_hash(hash) when "custom" then EmbeddingModelTypeCustom.from_hash(hash) when "llm" then EmbeddingModelTypeLlm.from_hash(hash) when "plugin" then EmbeddingModelTypePlugin.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Use a preset model configuration (recommended) EmbeddingModelTypePreset = Data.define(:name) do include EmbeddingModelType extend T::Sig # @return [String] sig { returns(String) } def name = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def preset? = true sig { returns(T::Boolean) } def custom? = false sig { returns(T::Boolean) } def llm? = false sig { returns(T::Boolean) } def plugin? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(name: hash[:name] || hash["name"]) end end # Use a custom ONNX model from HuggingFace EmbeddingModelTypeCustom = Data.define(:model_id, :dimensions) do include EmbeddingModelType extend T::Sig # @return [String] sig { returns(String) } def model_id = super # rubocop:disable Lint/UselessMethodDefinition # @return [Integer] sig { returns(Integer) } def dimensions = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def preset? = false sig { returns(T::Boolean) } def custom? = true sig { returns(T::Boolean) } def llm? = false sig { returns(T::Boolean) } def plugin? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(model_id: hash[:model_id] || hash["model_id"], dimensions: hash[:dimensions] || hash["dimensions"]) end end # Provider-hosted embedding model via liter-llm. # # Uses the model specified in the nested `LlmConfig` (e.g., # `"openai/text-embedding-3-small"`). EmbeddingModelTypeLlm = Data.define(:llm) do include EmbeddingModelType extend T::Sig # @return [LlmConfig] sig { returns(LlmConfig) } def llm = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def preset? = false sig { returns(T::Boolean) } def custom? = false sig { returns(T::Boolean) } def llm? = true sig { returns(T::Boolean) } def plugin? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(llm: hash[:llm] || hash["llm"]) end end # In-process embedding backend registered via the plugin system. # # The caller registers an [`EmbeddingBackend`](crate::plugins::EmbeddingBackend) once # (e.g. a wrapper around an already-loaded `llama-cpp-python`, `sentence-transformers`, # or tuned ONNX model), then references it by name in config. Kreuzberg calls back # into the registered backend during chunking and standalone embed requests — # no HuggingFace download, no ONNX Runtime requirement, no HTTP sidecar. # # When this variant is selected, only the following [`EmbeddingConfig`] fields # apply: `normalize` (post-call L2 normalization) and `max_embed_duration_secs` # (dispatcher timeout). Model-loading fields (`batch_size`, `cache_dir`, # `show_download_progress`, `acceleration`) are ignored — the host owns the # model lifecycle. # # Semantic chunking falls back to [`ChunkingConfig::max_characters`] when this variant # is used, since there is no preset to look a chunk-size ceiling up against — size your # context window via `max_characters` directly. # # See `register_embedding_backend`. EmbeddingModelTypePlugin = Data.define(:name) do include EmbeddingModelType extend T::Sig # @return [String] sig { returns(String) } def name = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def preset? = false sig { returns(T::Boolean) } def custom? = false sig { returns(T::Boolean) } def llm? = false sig { returns(T::Boolean) } def plugin? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(name: hash[:name] || hash["name"]) end end end module Kreuzberg # Tagged enum for node content. Each variant carries only type-specific data. # # Uses `#[serde(tag = "node_type")]` to avoid "type" keyword collision in # Go/Java/TypeScript bindings. module NodeContent extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:node_type] || hash["node_type"] case discriminator when "title" then NodeContentTitle.from_hash(hash) when "heading" then NodeContentHeading.from_hash(hash) when "paragraph" then NodeContentParagraph.from_hash(hash) when "list" then NodeContentList.from_hash(hash) when "list_item" then NodeContentListItem.from_hash(hash) when "table" then NodeContentTable.from_hash(hash) when "image" then NodeContentImage.from_hash(hash) when "code" then NodeContentCode.from_hash(hash) when "quote" then NodeContentQuote.from_hash(hash) when "formula" then NodeContentFormula.from_hash(hash) when "footnote" then NodeContentFootnote.from_hash(hash) when "group" then NodeContentGroup.from_hash(hash) when "page_break" then NodeContentPageBreak.from_hash(hash) when "slide" then NodeContentSlide.from_hash(hash) when "definition_list" then NodeContentDefinitionList.from_hash(hash) when "definition_item" then NodeContentDefinitionItem.from_hash(hash) when "citation" then NodeContentCitation.from_hash(hash) when "admonition" then NodeContentAdmonition.from_hash(hash) when "raw_block" then NodeContentRawBlock.from_hash(hash) when "metadata_block" then NodeContentMetadataBlock.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Document title. NodeContentTitle = Data.define(:text) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = true sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(text: hash[:text] || hash["text"]) end end # Section heading with level (1-6). NodeContentHeading = Data.define(:level, :text) do include NodeContent extend T::Sig # @return [Integer] sig { returns(Integer) } def level = super # rubocop:disable Lint/UselessMethodDefinition # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = true sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(level: hash[:level] || hash["level"], text: hash[:text] || hash["text"]) end end # Body text paragraph. NodeContentParagraph = Data.define(:text) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = true sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(text: hash[:text] || hash["text"]) end end # List container — children are `ListItem` nodes. NodeContentList = Data.define(:ordered) do include NodeContent extend T::Sig # @return [T::Boolean] sig { returns(T::Boolean) } def ordered = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = true sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(ordered: hash[:ordered] || hash["ordered"]) end end # Individual list item. NodeContentListItem = Data.define(:text) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = true sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(text: hash[:text] || hash["text"]) end end # Table with structured cell grid. NodeContentTable = Data.define(:grid) do include NodeContent extend T::Sig # @return [TableGrid] sig { returns(TableGrid) } def grid = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = true sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(grid: hash[:grid] || hash["grid"]) end end # Image reference. NodeContentImage = Data.define(:description, :image_index, :src) do include NodeContent extend T::Sig # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def description = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(Integer)] sig { returns(T.nilable(Integer)) } def image_index = super # rubocop:disable Lint/UselessMethodDefinition # Source URL or path of the image (from `` or `![](src)`). sig { returns(T.nilable(String)) } def src = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = true sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(description: hash[:description] || hash["description"], image_index: hash[:image_index] || hash["image_index"], src: hash[:src] || hash["src"]) end end # Code block. NodeContentCode = Data.define(:text, :language) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def language = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = true sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(text: hash[:text] || hash["text"], language: hash[:language] || hash["language"]) end end # Block quote — container, children carry the quoted content. NodeContentQuote = Data.define do include NodeContent extend T::Sig sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = true sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Mathematical formula / equation. NodeContentFormula = Data.define(:text) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = true sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(text: hash[:text] || hash["text"]) end end # Footnote reference content. NodeContentFootnote = Data.define(:text) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = true sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(text: hash[:text] || hash["text"]) end end # Logical grouping container (section, key-value area). # # `heading_level` + `heading_text` capture the section heading directly # rather than relying on a first-child positional convention. NodeContentGroup = Data.define(:label, :heading_level, :heading_text) do include NodeContent extend T::Sig # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def label = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(Integer)] sig { returns(T.nilable(Integer)) } def heading_level = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def heading_text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = true sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(label: hash[:label] || hash["label"], heading_level: hash[:heading_level] || hash["heading_level"], heading_text: hash[:heading_text] || hash["heading_text"]) end end # Page break marker. NodeContentPageBreak = Data.define do include NodeContent extend T::Sig sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = true sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Presentation slide container — children are the slide's content nodes. NodeContentSlide = Data.define(:number, :title) do include NodeContent extend T::Sig # 1-indexed slide number. sig { returns(Integer) } def number = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def title = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = true sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(number: hash[:number] || hash["number"], title: hash[:title] || hash["title"]) end end # Definition list container — children are `DefinitionItem` nodes. NodeContentDefinitionList = Data.define do include NodeContent extend T::Sig sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = true sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Individual definition list entry with term and definition. NodeContentDefinitionItem = Data.define(:term, :definition) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def term = super # rubocop:disable Lint/UselessMethodDefinition # @return [String] sig { returns(String) } def definition = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = true sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(term: hash[:term] || hash["term"], definition: hash[:definition] || hash["definition"]) end end # Citation or bibliographic reference. NodeContentCitation = Data.define(:key, :text) do include NodeContent extend T::Sig # @return [String] sig { returns(String) } def key = super # rubocop:disable Lint/UselessMethodDefinition # @return [String] sig { returns(String) } def text = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = true sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(key: hash[:key] || hash["key"], text: hash[:text] || hash["text"]) end end # Admonition / callout container (note, warning, tip, etc.). # # Children carry the admonition body content. NodeContentAdmonition = Data.define(:kind, :title) do include NodeContent extend T::Sig # Kind of admonition (e.g. "note", "warning", "tip", "danger"). sig { returns(String) } def kind = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def title = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = true sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(kind: hash[:kind] || hash["kind"], title: hash[:title] || hash["title"]) end end # Raw block preserved verbatim from the source format. # # Used for content that cannot be mapped to a semantic node type # (e.g. JSX in MDX, raw LaTeX in markdown, embedded HTML). NodeContentRawBlock = Data.define(:format, :content) do include NodeContent extend T::Sig # Source format identifier (e.g. "html", "latex", "jsx"). sig { returns(String) } def format = super # rubocop:disable Lint/UselessMethodDefinition # @return [String] sig { returns(String) } def content = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = true sig { returns(T::Boolean) } def metadata_block? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(format: hash[:format] || hash["format"], content: hash[:content] || hash["content"]) end end # Structured metadata block (email headers, YAML frontmatter, etc.). NodeContentMetadataBlock = Data.define(:entries) do include NodeContent extend T::Sig # @return [T::Array[T::Array[String]]] sig { returns(T::Array[T::Array[String]]) } def entries = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def title? = false sig { returns(T::Boolean) } def heading? = false sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def list? = false sig { returns(T::Boolean) } def list_item? = false sig { returns(T::Boolean) } def table? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def quote? = false sig { returns(T::Boolean) } def formula? = false sig { returns(T::Boolean) } def footnote? = false sig { returns(T::Boolean) } def group? = false sig { returns(T::Boolean) } def page_break? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def definition_list? = false sig { returns(T::Boolean) } def definition_item? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def admonition? = false sig { returns(T::Boolean) } def raw_block? = false sig { returns(T::Boolean) } def metadata_block? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(entries: hash[:entries] || hash["entries"]) end end end module Kreuzberg # Types of inline text annotations. module AnnotationKind extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:annotation_type] || hash["annotation_type"] case discriminator when "bold" then AnnotationKindBold.from_hash(hash) when "italic" then AnnotationKindItalic.from_hash(hash) when "underline" then AnnotationKindUnderline.from_hash(hash) when "strikethrough" then AnnotationKindStrikethrough.from_hash(hash) when "code" then AnnotationKindCode.from_hash(hash) when "subscript" then AnnotationKindSubscript.from_hash(hash) when "superscript" then AnnotationKindSuperscript.from_hash(hash) when "link" then AnnotationKindLink.from_hash(hash) when "highlight" then AnnotationKindHighlight.from_hash(hash) when "color" then AnnotationKindColor.from_hash(hash) when "font_size" then AnnotationKindFontSize.from_hash(hash) when "custom" then AnnotationKindCustom.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Variant AnnotationKindBold of the AnnotationKind sum type. AnnotationKindBold = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = true sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindItalic of the AnnotationKind sum type. AnnotationKindItalic = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = true sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindUnderline of the AnnotationKind sum type. AnnotationKindUnderline = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = true sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindStrikethrough of the AnnotationKind sum type. AnnotationKindStrikethrough = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = true sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindCode of the AnnotationKind sum type. AnnotationKindCode = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = true sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindSubscript of the AnnotationKind sum type. AnnotationKindSubscript = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = true sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindSuperscript of the AnnotationKind sum type. AnnotationKindSuperscript = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = true sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Variant AnnotationKindLink of the AnnotationKind sum type. AnnotationKindLink = Data.define(:url, :title) do include AnnotationKind extend T::Sig # @return [String] sig { returns(String) } def url = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def title = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = true sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(url: hash[:url] || hash["url"], title: hash[:title] || hash["title"]) end end # Highlighted text (PDF highlights, HTML ``). AnnotationKindHighlight = Data.define do include AnnotationKind extend T::Sig sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = true sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new end end # Text color (CSS-compatible value, e.g. "#ff0000", "red"). AnnotationKindColor = Data.define(:value) do include AnnotationKind extend T::Sig # @return [String] sig { returns(String) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = true sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:value] || hash["value"]) end end # Font size with units (e.g. "12pt", "1.2em", "16px"). AnnotationKindFontSize = Data.define(:value) do include AnnotationKind extend T::Sig # @return [String] sig { returns(String) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = true sig { returns(T::Boolean) } def custom? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:value] || hash["value"]) end end # Extensible annotation for format-specific styling. AnnotationKindCustom = Data.define(:name, :value) do include AnnotationKind extend T::Sig # @return [String] sig { returns(String) } def name = super # rubocop:disable Lint/UselessMethodDefinition # @return [T.nilable(String)] sig { returns(T.nilable(String)) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def bold? = false sig { returns(T::Boolean) } def italic? = false sig { returns(T::Boolean) } def underline? = false sig { returns(T::Boolean) } def strikethrough? = false sig { returns(T::Boolean) } def code? = false sig { returns(T::Boolean) } def subscript? = false sig { returns(T::Boolean) } def superscript? = false sig { returns(T::Boolean) } def link? = false sig { returns(T::Boolean) } def highlight? = false sig { returns(T::Boolean) } def color? = false sig { returns(T::Boolean) } def font_size? = false sig { returns(T::Boolean) } def custom? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(name: hash[:name] || hash["name"], value: hash[:value] || hash["value"]) end end end module Kreuzberg # Format-specific metadata (discriminated union). # # Only one format type can exist per extraction result. This provides # type-safe, clean metadata without nested optionals. module FormatMetadata extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:format_type] || hash["format_type"] case discriminator when "pdf" then FormatMetadataPdf.from_hash(hash) when "docx" then FormatMetadataDocx.from_hash(hash) when "excel" then FormatMetadataExcel.from_hash(hash) when "email" then FormatMetadataEmail.from_hash(hash) when "pptx" then FormatMetadataPptx.from_hash(hash) when "archive" then FormatMetadataArchive.from_hash(hash) when "image" then FormatMetadataImage.from_hash(hash) when "xml" then FormatMetadataXml.from_hash(hash) when "text" then FormatMetadataText.from_hash(hash) when "html" then FormatMetadataHtml.from_hash(hash) when "ocr" then FormatMetadataOcr.from_hash(hash) when "csv" then FormatMetadataCsv.from_hash(hash) when "bibtex" then FormatMetadataBibtex.from_hash(hash) when "citation" then FormatMetadataCitation.from_hash(hash) when "fiction_book" then FormatMetadataFictionBook.from_hash(hash) when "dbf" then FormatMetadataDbf.from_hash(hash) when "jats" then FormatMetadataJats.from_hash(hash) when "epub" then FormatMetadataEpub.from_hash(hash) when "pst" then FormatMetadataPst.from_hash(hash) when "code" then FormatMetadataCode.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Variant FormatMetadataPdf of the FormatMetadata sum type. FormatMetadataPdf = Data.define(:value) do include FormatMetadata extend T::Sig # @return [PdfMetadata] sig { returns(PdfMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = true sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataDocx of the FormatMetadata sum type. FormatMetadataDocx = Data.define(:value) do include FormatMetadata extend T::Sig # @return [DocxMetadata] sig { returns(DocxMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = true sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataExcel of the FormatMetadata sum type. FormatMetadataExcel = Data.define(:value) do include FormatMetadata extend T::Sig # @return [ExcelMetadata] sig { returns(ExcelMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = true sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataEmail of the FormatMetadata sum type. FormatMetadataEmail = Data.define(:value) do include FormatMetadata extend T::Sig # @return [EmailMetadata] sig { returns(EmailMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = true sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataPptx of the FormatMetadata sum type. FormatMetadataPptx = Data.define(:value) do include FormatMetadata extend T::Sig # @return [PptxMetadata] sig { returns(PptxMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = true sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataArchive of the FormatMetadata sum type. FormatMetadataArchive = Data.define(:value) do include FormatMetadata extend T::Sig # @return [ArchiveMetadata] sig { returns(ArchiveMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = true sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataImage of the FormatMetadata sum type. FormatMetadataImage = Data.define(:value) do include FormatMetadata extend T::Sig # @return [ImageMetadata] sig { returns(ImageMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = true sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataXml of the FormatMetadata sum type. FormatMetadataXml = Data.define(:value) do include FormatMetadata extend T::Sig # @return [XmlMetadata] sig { returns(XmlMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = true sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataText of the FormatMetadata sum type. FormatMetadataText = Data.define(:value) do include FormatMetadata extend T::Sig # @return [TextMetadata] sig { returns(TextMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = true sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataHtml of the FormatMetadata sum type. FormatMetadataHtml = Data.define(:value) do include FormatMetadata extend T::Sig # @return [HtmlMetadata] sig { returns(HtmlMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = true sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataOcr of the FormatMetadata sum type. FormatMetadataOcr = Data.define(:value) do include FormatMetadata extend T::Sig # @return [OcrMetadata] sig { returns(OcrMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = true sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataCsv of the FormatMetadata sum type. FormatMetadataCsv = Data.define(:value) do include FormatMetadata extend T::Sig # @return [CsvMetadata] sig { returns(CsvMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = true sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataBibtex of the FormatMetadata sum type. FormatMetadataBibtex = Data.define(:value) do include FormatMetadata extend T::Sig # @return [BibtexMetadata] sig { returns(BibtexMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = true sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataCitation of the FormatMetadata sum type. FormatMetadataCitation = Data.define(:value) do include FormatMetadata extend T::Sig # @return [CitationMetadata] sig { returns(CitationMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = true sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataFictionBook of the FormatMetadata sum type. FormatMetadataFictionBook = Data.define(:value) do include FormatMetadata extend T::Sig # @return [FictionBookMetadata] sig { returns(FictionBookMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = true sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataDbf of the FormatMetadata sum type. FormatMetadataDbf = Data.define(:value) do include FormatMetadata extend T::Sig # @return [DbfMetadata] sig { returns(DbfMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = true sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataJats of the FormatMetadata sum type. FormatMetadataJats = Data.define(:value) do include FormatMetadata extend T::Sig # @return [JatsMetadata] sig { returns(JatsMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = true sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataEpub of the FormatMetadata sum type. FormatMetadataEpub = Data.define(:value) do include FormatMetadata extend T::Sig # @return [EpubMetadata] sig { returns(EpubMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = true sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataPst of the FormatMetadata sum type. FormatMetadataPst = Data.define(:value) do include FormatMetadata extend T::Sig # @return [PstMetadata] sig { returns(PstMetadata) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = true sig { returns(T::Boolean) } def code? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Variant FormatMetadataCode of the FormatMetadata sum type. FormatMetadataCode = Data.define(:value) do include FormatMetadata extend T::Sig # @return [String] sig { returns(String) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def pdf? = false sig { returns(T::Boolean) } def docx? = false sig { returns(T::Boolean) } def excel? = false sig { returns(T::Boolean) } def email? = false sig { returns(T::Boolean) } def pptx? = false sig { returns(T::Boolean) } def archive? = false sig { returns(T::Boolean) } def image? = false sig { returns(T::Boolean) } def xml? = false sig { returns(T::Boolean) } def text? = false sig { returns(T::Boolean) } def html? = false sig { returns(T::Boolean) } def ocr? = false sig { returns(T::Boolean) } def csv? = false sig { returns(T::Boolean) } def bibtex? = false sig { returns(T::Boolean) } def citation? = false sig { returns(T::Boolean) } def fiction_book? = false sig { returns(T::Boolean) } def dbf? = false sig { returns(T::Boolean) } def jats? = false sig { returns(T::Boolean) } def epub? = false sig { returns(T::Boolean) } def pst? = false sig { returns(T::Boolean) } def code? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end end module Kreuzberg # Bounding geometry for an OCR element. # # Supports both axis-aligned rectangles (from Tesseract) and 4-point quadrilaterals # (from PaddleOCR and rotated text detection). module OcrBoundingGeometry extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:type] || hash["type"] case discriminator when "rectangle" then OcrBoundingGeometryRectangle.from_hash(hash) when "quadrilateral" then OcrBoundingGeometryQuadrilateral.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Axis-aligned bounding box (typical for Tesseract output). OcrBoundingGeometryRectangle = Data.define(:left, :top, :width, :height) do include OcrBoundingGeometry extend T::Sig # Left x-coordinate in pixels sig { returns(Integer) } def left = super # rubocop:disable Lint/UselessMethodDefinition # Top y-coordinate in pixels sig { returns(Integer) } def top = super # rubocop:disable Lint/UselessMethodDefinition # Width in pixels sig { returns(Integer) } def width = super # rubocop:disable Lint/UselessMethodDefinition # Height in pixels sig { returns(Integer) } def height = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def rectangle? = true sig { returns(T::Boolean) } def quadrilateral? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(left: hash[:left] || hash["left"], top: hash[:top] || hash["top"], width: hash[:width] || hash["width"], height: hash[:height] || hash["height"]) end end # 4-point quadrilateral for rotated/skewed text (PaddleOCR). # # Points are in clockwise order starting from top-left: # `[top_left, top_right, bottom_right, bottom_left]` OcrBoundingGeometryQuadrilateral = Data.define(:points) do include OcrBoundingGeometry extend T::Sig # Four corner points as `[(x, y), ...]` in clockwise order sig { returns(String) } def points = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def rectangle? = false sig { returns(T::Boolean) } def quadrilateral? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(points: hash[:points] || hash["points"]) end end end module Kreuzberg # A single line in a unified-diff hunk. # # Defined here (rather than only in `crate::diff`) so `RevisionDelta` can # reference it unconditionally, without requiring the `diff` Cargo feature. # `crate::diff` re-exports this type verbatim. module DiffLine extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:kind] || hash["kind"] case discriminator when "context" then DiffLineContext.from_hash(hash) when "added" then DiffLineAdded.from_hash(hash) when "removed" then DiffLineRemoved.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Unchanged context line. DiffLineContext = Data.define(:value) do include DiffLine extend T::Sig # @return [String] sig { returns(String) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def context? = true sig { returns(T::Boolean) } def added? = false sig { returns(T::Boolean) } def removed? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Line added in the "after" version. DiffLineAdded = Data.define(:value) do include DiffLine extend T::Sig # @return [String] sig { returns(String) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def context? = false sig { returns(T::Boolean) } def added? = true sig { returns(T::Boolean) } def removed? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end # Line removed from the "before" version. DiffLineRemoved = Data.define(:value) do include DiffLine extend T::Sig # @return [String] sig { returns(String) } def value = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def context? = false sig { returns(T::Boolean) } def added? = false sig { returns(T::Boolean) } def removed? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(value: hash[:_0] || hash["_0"]) end end end module Kreuzberg # Best-effort document location for a revision. module RevisionAnchor extend T::Helpers extend T::Sig interface! # Dispatch from a Hash to the appropriate variant constructor. # @param hash [Hash] with discriminator field and variant-specific fields # @return [variant_class] an instance of the appropriate variant sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.untyped) } def self.from_hash(hash) discriminator = hash[:type] || hash["type"] case discriminator when "paragraph" then RevisionAnchorParagraph.from_hash(hash) when "table_cell" then RevisionAnchorTableCell.from_hash(hash) when "page" then RevisionAnchorPage.from_hash(hash) when "slide" then RevisionAnchorSlide.from_hash(hash) when "sheet" then RevisionAnchorSheet.from_hash(hash) else raise "Unknown discriminator: #{discriminator}" end end end # Body paragraph, identified by its zero-based index in the document flow. RevisionAnchorParagraph = Data.define(:index) do include RevisionAnchor extend T::Sig # Zero-based index of the paragraph in document order. sig { returns(Integer) } def index = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def paragraph? = true sig { returns(T::Boolean) } def table_cell? = false sig { returns(T::Boolean) } def page? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def sheet? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(index: hash[:index] || hash["index"]) end end # Cell inside a table. RevisionAnchorTableCell = Data.define(:row, :col, :table_index) do include RevisionAnchor extend T::Sig # Zero-based row index within the table. sig { returns(Integer) } def row = super # rubocop:disable Lint/UselessMethodDefinition # Zero-based column index within the table. sig { returns(Integer) } def col = super # rubocop:disable Lint/UselessMethodDefinition # Zero-based index of the table in document order. sig { returns(Integer) } def table_index = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def table_cell? = true sig { returns(T::Boolean) } def page? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def sheet? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(row: hash[:row] || hash["row"], col: hash[:col] || hash["col"], table_index: hash[:table_index] || hash["table_index"]) end end # Page, identified by its zero-based index. RevisionAnchorPage = Data.define(:index) do include RevisionAnchor extend T::Sig # Zero-based page index. sig { returns(Integer) } def index = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def table_cell? = false sig { returns(T::Boolean) } def page? = true sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def sheet? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(index: hash[:index] || hash["index"]) end end # Presentation slide, identified by its zero-based index. RevisionAnchorSlide = Data.define(:index) do include RevisionAnchor extend T::Sig # Zero-based slide index. sig { returns(Integer) } def index = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def table_cell? = false sig { returns(T::Boolean) } def page? = false sig { returns(T::Boolean) } def slide? = true sig { returns(T::Boolean) } def sheet? = false # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(index: hash[:index] || hash["index"]) end end # Spreadsheet cell or range, identified by sheet index and optional name. RevisionAnchorSheet = Data.define(:index, :name) do include RevisionAnchor extend T::Sig # Zero-based sheet index. sig { returns(Integer) } def index = super # rubocop:disable Lint/UselessMethodDefinition # Sheet display name when available. sig { returns(T.nilable(String)) } def name = super # rubocop:disable Lint/UselessMethodDefinition sig { returns(T::Boolean) } def paragraph? = false sig { returns(T::Boolean) } def table_cell? = false sig { returns(T::Boolean) } def page? = false sig { returns(T::Boolean) } def slide? = false sig { returns(T::Boolean) } def sheet? = true # @param hash [Hash] deserialized from the native extension # @return [self] sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) } def self.from_hash(hash) new(index: hash[:index] || hash["index"], name: hash[:name] || hash["name"]) end end end