56 lines
1.6 KiB
Elixir
56 lines
1.6 KiB
Elixir
|
|
# This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
# To regenerate: alef generate
|
||
|
|
# To verify freshness: alef verify --exit-code
|
||
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
defmodule Kreuzberg.ChunkingConfig do
|
||
|
|
@moduledoc """
|
||
|
|
Chunking configuration.
|
||
|
|
|
||
|
|
Configures text chunking for document content, including chunk size,
|
||
|
|
overlap, trimming behavior, and optional embeddings.
|
||
|
|
|
||
|
|
Use `..Default::default()` when constructing to allow for future field additions:
|
||
|
|
```rust
|
||
|
|
let config = ChunkingConfig {
|
||
|
|
max_characters: 500,
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
```
|
||
|
|
"""
|
||
|
|
|
||
|
|
@typedoc "Chunking configuration."
|
||
|
|
@type t :: %__MODULE__{
|
||
|
|
max_characters: non_neg_integer(),
|
||
|
|
overlap: non_neg_integer(),
|
||
|
|
trim: boolean(),
|
||
|
|
chunker_type: String.t() | nil,
|
||
|
|
embedding: map() | nil,
|
||
|
|
preset: String.t() | nil,
|
||
|
|
sizing: String.t() | nil,
|
||
|
|
prepend_heading_context: boolean(),
|
||
|
|
topic_threshold: float() | nil
|
||
|
|
}
|
||
|
|
|
||
|
|
defstruct max_characters: 1_000,
|
||
|
|
overlap: 200,
|
||
|
|
trim: true,
|
||
|
|
chunker_type: :text,
|
||
|
|
embedding: nil,
|
||
|
|
preset: nil,
|
||
|
|
sizing: :characters,
|
||
|
|
prepend_heading_context: false,
|
||
|
|
topic_threshold: nil
|
||
|
|
|
||
|
|
defimpl Jason.Encoder do
|
||
|
|
@doc false
|
||
|
|
def encode(value, opts) do
|
||
|
|
value
|
||
|
|
|> Map.from_struct()
|
||
|
|
|> Enum.reject(fn {_k, v} -> v == nil end)
|
||
|
|
|> Enum.into(%{})
|
||
|
|
|> Jason.Encoder.encode(opts)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|