This commit is contained in:
61
packages/elixir/lib/kreuzberg/ocr_quality_thresholds.ex
generated
Normal file
61
packages/elixir/lib/kreuzberg/ocr_quality_thresholds.ex
generated
Normal file
@@ -0,0 +1,61 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
defmodule Kreuzberg.OcrQualityThresholds do
|
||||
@moduledoc """
|
||||
Quality thresholds for OCR fallback decisions and pipeline quality gating.
|
||||
|
||||
All fields default to the values that match the previous hardcoded behavior,
|
||||
so `OcrQualityThresholds::default()` preserves existing semantics exactly.
|
||||
"""
|
||||
|
||||
@typedoc "Quality thresholds for OCR fallback decisions and pipeline quality gating."
|
||||
@type t :: %__MODULE__{
|
||||
min_total_non_whitespace: non_neg_integer(),
|
||||
min_non_whitespace_per_page: float(),
|
||||
min_meaningful_word_len: non_neg_integer(),
|
||||
min_meaningful_words: non_neg_integer(),
|
||||
min_alnum_ratio: float(),
|
||||
min_garbage_chars: non_neg_integer(),
|
||||
max_fragmented_word_ratio: float(),
|
||||
critical_fragmented_word_ratio: float(),
|
||||
min_avg_word_length: float(),
|
||||
min_words_for_avg_length_check: non_neg_integer(),
|
||||
min_consecutive_repeat_ratio: float(),
|
||||
min_words_for_repeat_check: non_neg_integer(),
|
||||
substantive_min_chars: non_neg_integer(),
|
||||
non_text_min_chars: non_neg_integer(),
|
||||
alnum_ws_ratio_threshold: float(),
|
||||
pipeline_min_quality: float()
|
||||
}
|
||||
|
||||
defstruct min_total_non_whitespace: 64,
|
||||
min_non_whitespace_per_page: 32,
|
||||
min_meaningful_word_len: 4,
|
||||
min_meaningful_words: 3,
|
||||
min_alnum_ratio: 0.3,
|
||||
min_garbage_chars: 5,
|
||||
max_fragmented_word_ratio: 0.6,
|
||||
critical_fragmented_word_ratio: 0.8,
|
||||
min_avg_word_length: 2,
|
||||
min_words_for_avg_length_check: 50,
|
||||
min_consecutive_repeat_ratio: 0.08,
|
||||
min_words_for_repeat_check: 50,
|
||||
substantive_min_chars: 100,
|
||||
non_text_min_chars: 20,
|
||||
alnum_ws_ratio_threshold: 0.4,
|
||||
pipeline_min_quality: 0.5
|
||||
|
||||
defimpl Jason.Encoder do
|
||||
@doc false
|
||||
def encode(value, opts) do
|
||||
value
|
||||
|> Map.from_struct()
|
||||
|> Enum.reject(fn {_k, v} -> v == nil end)
|
||||
|> Enum.into(%{})
|
||||
|> Jason.Encoder.encode(opts)
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user