Files
fil/packages/elixir/lib/kreuzberg/file_extraction_config.ex

97 lines
3.0 KiB
Elixir
Raw Normal View History

2026-06-01 23:40:55 +02:00
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
defmodule Kreuzberg.FileExtractionConfig do
@moduledoc """
Per-file extraction configuration overrides for batch processing.
All fields are `Option<T>` `None` means "use the batch-level default."
This type is used with `batch_extract_files` and
`batch_extract_bytes` to allow heterogeneous
extraction settings within a single batch.
# Excluded Fields
The following `ExtractionConfig` fields are batch-level only and
cannot be overridden per file:
- `max_concurrent_extractions` controls batch parallelism
- `use_cache` global caching policy
- `acceleration` shared ONNX execution provider
- `security_limits` global archive security policy
# Example
```rust
use kreuzberg::FileExtractionConfig;
// Override just OCR forcing for a specific file
let config = FileExtractionConfig {
force_ocr: Some(true),
..Default::default()
};
```
"""
@typedoc "Per-file extraction configuration overrides for batch processing."
@type t :: %__MODULE__{
enable_quality_processing: boolean() | nil,
ocr: map() | nil,
force_ocr: boolean() | nil,
force_ocr_pages: [non_neg_integer()] | nil,
disable_ocr: boolean() | nil,
chunking: map() | nil,
content_filter: map() | nil,
images: map() | nil,
pdf_options: map() | nil,
token_reduction: map() | nil,
language_detection: map() | nil,
pages: map() | nil,
keywords: map() | nil,
postprocessor: map() | nil,
html_options: String.t() | nil,
result_format: String.t() | nil | nil,
output_format: String.t() | nil | nil,
include_document_structure: boolean() | nil,
layout: map() | nil,
timeout_secs: non_neg_integer() | nil,
tree_sitter: map() | nil,
structured_extraction: map() | nil
}
defstruct enable_quality_processing: nil,
ocr: nil,
force_ocr: nil,
force_ocr_pages: nil,
disable_ocr: nil,
chunking: nil,
content_filter: nil,
images: nil,
pdf_options: nil,
token_reduction: nil,
language_detection: nil,
pages: nil,
keywords: nil,
postprocessor: nil,
html_options: nil,
result_format: nil,
output_format: nil,
include_document_structure: nil,
layout: nil,
timeout_secs: nil,
tree_sitter: nil,
structured_extraction: nil
defimpl Jason.Encoder do
@doc false
def encode(value, opts) do
value
|> Map.from_struct()
|> Enum.reject(fn {_k, v} -> v == nil end)
|> Enum.into(%{})
|> Jason.Encoder.encode(opts)
end
end
end