112 lines
3.6 KiB
Elixir
Generated
112 lines
3.6 KiB
Elixir
Generated
# This file is auto-generated by alef — DO NOT EDIT.
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
# To regenerate: alef generate
|
|
# To verify freshness: alef verify --exit-code
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
defmodule Kreuzberg.ExtractionConfig do
|
|
@moduledoc """
|
|
Main extraction configuration.
|
|
|
|
This struct contains all configuration options for the extraction process.
|
|
It can be loaded from TOML, YAML, or JSON files, or created programmatically.
|
|
|
|
# Example
|
|
|
|
```rust
|
|
use kreuzberg::core::config::ExtractionConfig;
|
|
|
|
// Create with defaults
|
|
let config = ExtractionConfig::default();
|
|
|
|
// Load from TOML file
|
|
// let config = ExtractionConfig::from_toml_file("kreuzberg.toml")?;
|
|
```
|
|
"""
|
|
|
|
@typedoc "Main extraction configuration."
|
|
@type t :: %__MODULE__{
|
|
use_cache: boolean(),
|
|
enable_quality_processing: boolean(),
|
|
ocr: map() | nil,
|
|
force_ocr: boolean(),
|
|
force_ocr_pages: [non_neg_integer()] | nil,
|
|
disable_ocr: boolean(),
|
|
chunking: map() | nil,
|
|
content_filter: map() | nil,
|
|
images: map() | nil,
|
|
pdf_options: map() | nil,
|
|
token_reduction: map() | nil,
|
|
language_detection: map() | nil,
|
|
pages: map() | nil,
|
|
keywords: map() | nil,
|
|
postprocessor: map() | nil,
|
|
html_options: String.t() | nil,
|
|
html_output: map() | nil,
|
|
extraction_timeout_secs: non_neg_integer() | nil,
|
|
max_concurrent_extractions: non_neg_integer() | nil,
|
|
result_format: String.t() | nil,
|
|
security_limits: map() | nil,
|
|
max_embedded_file_bytes: non_neg_integer() | nil,
|
|
output_format: String.t() | nil,
|
|
layout: map() | nil,
|
|
use_layout_for_markdown: boolean(),
|
|
include_document_structure: boolean(),
|
|
acceleration: map() | nil,
|
|
cache_namespace: String.t() | nil,
|
|
cache_ttl_secs: non_neg_integer() | nil,
|
|
email: map() | nil,
|
|
concurrency: String.t() | nil,
|
|
max_archive_depth: non_neg_integer(),
|
|
tree_sitter: map() | nil,
|
|
structured_extraction: map() | nil,
|
|
cancel_token: String.t() | nil
|
|
}
|
|
|
|
defstruct use_cache: true,
|
|
enable_quality_processing: true,
|
|
ocr: nil,
|
|
force_ocr: false,
|
|
force_ocr_pages: nil,
|
|
disable_ocr: false,
|
|
chunking: nil,
|
|
content_filter: nil,
|
|
images: nil,
|
|
pdf_options: nil,
|
|
token_reduction: nil,
|
|
language_detection: nil,
|
|
pages: nil,
|
|
keywords: nil,
|
|
postprocessor: nil,
|
|
html_options: nil,
|
|
html_output: nil,
|
|
extraction_timeout_secs: nil,
|
|
max_concurrent_extractions: nil,
|
|
result_format: :unified,
|
|
security_limits: nil,
|
|
max_embedded_file_bytes: nil,
|
|
output_format: :plain,
|
|
layout: nil,
|
|
use_layout_for_markdown: false,
|
|
include_document_structure: false,
|
|
acceleration: nil,
|
|
cache_namespace: nil,
|
|
cache_ttl_secs: nil,
|
|
email: nil,
|
|
concurrency: nil,
|
|
max_archive_depth: 0,
|
|
tree_sitter: nil,
|
|
structured_extraction: nil,
|
|
cancel_token: nil
|
|
|
|
defimpl Jason.Encoder do
|
|
@doc false
|
|
def encode(value, opts) do
|
|
value
|
|
|> Map.from_struct()
|
|
|> Enum.reject(fn {_k, v} -> v == nil end)
|
|
|> Enum.into(%{})
|
|
|> Jason.Encoder.encode(opts)
|
|
end
|
|
end
|
|
end
|