Files
fil/packages/elixir/lib/kreuzberg/extraction_config.ex

112 lines
3.6 KiB
Elixir
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
defmodule Kreuzberg.ExtractionConfig do
@moduledoc """
Main extraction configuration.
This struct contains all configuration options for the extraction process.
It can be loaded from TOML, YAML, or JSON files, or created programmatically.
# Example
```rust
use kreuzberg::core::config::ExtractionConfig;
// Create with defaults
let config = ExtractionConfig::default();
// Load from TOML file
// let config = ExtractionConfig::from_toml_file("kreuzberg.toml")?;
```
"""
@typedoc "Main extraction configuration."
@type t :: %__MODULE__{
use_cache: boolean(),
enable_quality_processing: boolean(),
ocr: map() | nil,
force_ocr: boolean(),
force_ocr_pages: [non_neg_integer()] | nil,
disable_ocr: boolean(),
chunking: map() | nil,
content_filter: map() | nil,
images: map() | nil,
pdf_options: map() | nil,
token_reduction: map() | nil,
language_detection: map() | nil,
pages: map() | nil,
keywords: map() | nil,
postprocessor: map() | nil,
html_options: String.t() | nil,
html_output: map() | nil,
extraction_timeout_secs: non_neg_integer() | nil,
max_concurrent_extractions: non_neg_integer() | nil,
result_format: String.t() | nil,
security_limits: map() | nil,
max_embedded_file_bytes: non_neg_integer() | nil,
output_format: String.t() | nil,
layout: map() | nil,
use_layout_for_markdown: boolean(),
include_document_structure: boolean(),
acceleration: map() | nil,
cache_namespace: String.t() | nil,
cache_ttl_secs: non_neg_integer() | nil,
email: map() | nil,
concurrency: String.t() | nil,
max_archive_depth: non_neg_integer(),
tree_sitter: map() | nil,
structured_extraction: map() | nil,
cancel_token: String.t() | nil
}
defstruct use_cache: true,
enable_quality_processing: true,
ocr: nil,
force_ocr: false,
force_ocr_pages: nil,
disable_ocr: false,
chunking: nil,
content_filter: nil,
images: nil,
pdf_options: nil,
token_reduction: nil,
language_detection: nil,
pages: nil,
keywords: nil,
postprocessor: nil,
html_options: nil,
html_output: nil,
extraction_timeout_secs: nil,
max_concurrent_extractions: nil,
result_format: :unified,
security_limits: nil,
max_embedded_file_bytes: nil,
output_format: :plain,
layout: nil,
use_layout_for_markdown: false,
include_document_structure: false,
acceleration: nil,
cache_namespace: nil,
cache_ttl_secs: nil,
email: nil,
concurrency: nil,
max_archive_depth: 0,
tree_sitter: nil,
structured_extraction: nil,
cancel_token: nil
defimpl Jason.Encoder do
@doc false
def encode(value, opts) do
value
|> Map.from_struct()
|> Enum.reject(fn {_k, v} -> v == nil end)
|> Enum.into(%{})
|> Jason.Encoder.encode(opts)
end
end
end