97 lines
3.0 KiB
Elixir
Generated
97 lines
3.0 KiB
Elixir
Generated
# This file is auto-generated by alef — DO NOT EDIT.
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
# To regenerate: alef generate
|
|
# To verify freshness: alef verify --exit-code
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
defmodule Kreuzberg.FileExtractionConfig do
|
|
@moduledoc """
|
|
Per-file extraction configuration overrides for batch processing.
|
|
|
|
All fields are `Option<T>` — `None` means "use the batch-level default."
|
|
This type is used with `batch_extract_files` and
|
|
`batch_extract_bytes` to allow heterogeneous
|
|
extraction settings within a single batch.
|
|
|
|
# Excluded Fields
|
|
|
|
The following `ExtractionConfig` fields are batch-level only and
|
|
cannot be overridden per file:
|
|
- `max_concurrent_extractions` — controls batch parallelism
|
|
- `use_cache` — global caching policy
|
|
- `acceleration` — shared ONNX execution provider
|
|
- `security_limits` — global archive security policy
|
|
|
|
# Example
|
|
|
|
```rust
|
|
use kreuzberg::FileExtractionConfig;
|
|
|
|
// Override just OCR forcing for a specific file
|
|
let config = FileExtractionConfig {
|
|
force_ocr: Some(true),
|
|
..Default::default()
|
|
};
|
|
```
|
|
"""
|
|
|
|
@typedoc "Per-file extraction configuration overrides for batch processing."
|
|
@type t :: %__MODULE__{
|
|
enable_quality_processing: boolean() | nil,
|
|
ocr: map() | nil,
|
|
force_ocr: boolean() | nil,
|
|
force_ocr_pages: [non_neg_integer()] | nil,
|
|
disable_ocr: boolean() | nil,
|
|
chunking: map() | nil,
|
|
content_filter: map() | nil,
|
|
images: map() | nil,
|
|
pdf_options: map() | nil,
|
|
token_reduction: map() | nil,
|
|
language_detection: map() | nil,
|
|
pages: map() | nil,
|
|
keywords: map() | nil,
|
|
postprocessor: map() | nil,
|
|
html_options: String.t() | nil,
|
|
result_format: String.t() | nil | nil,
|
|
output_format: String.t() | nil | nil,
|
|
include_document_structure: boolean() | nil,
|
|
layout: map() | nil,
|
|
timeout_secs: non_neg_integer() | nil,
|
|
tree_sitter: map() | nil,
|
|
structured_extraction: map() | nil
|
|
}
|
|
|
|
defstruct enable_quality_processing: nil,
|
|
ocr: nil,
|
|
force_ocr: nil,
|
|
force_ocr_pages: nil,
|
|
disable_ocr: nil,
|
|
chunking: nil,
|
|
content_filter: nil,
|
|
images: nil,
|
|
pdf_options: nil,
|
|
token_reduction: nil,
|
|
language_detection: nil,
|
|
pages: nil,
|
|
keywords: nil,
|
|
postprocessor: nil,
|
|
html_options: nil,
|
|
result_format: nil,
|
|
output_format: nil,
|
|
include_document_structure: nil,
|
|
layout: nil,
|
|
timeout_secs: nil,
|
|
tree_sitter: nil,
|
|
structured_extraction: nil
|
|
|
|
defimpl Jason.Encoder do
|
|
@doc false
|
|
def encode(value, opts) do
|
|
value
|
|
|> Map.from_struct()
|
|
|> Enum.reject(fn {_k, v} -> v == nil end)
|
|
|> Enum.into(%{})
|
|
|> Jason.Encoder.encode(opts)
|
|
end
|
|
end
|
|
end
|