73 lines
2.6 KiB
Elixir
73 lines
2.6 KiB
Elixir
|
|
# This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
# To regenerate: alef generate
|
||
|
|
# To verify freshness: alef verify --exit-code
|
||
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
defmodule Kreuzberg.TesseractConfig do
|
||
|
|
@moduledoc """
|
||
|
|
Tesseract OCR configuration.
|
||
|
|
|
||
|
|
Provides fine-grained control over Tesseract OCR engine parameters.
|
||
|
|
Most users can use the defaults, but these settings allow optimization
|
||
|
|
for specific document types (invoices, handwriting, etc.).
|
||
|
|
"""
|
||
|
|
|
||
|
|
@typedoc "Tesseract OCR configuration."
|
||
|
|
@type t :: %__MODULE__{
|
||
|
|
language: String.t() | nil,
|
||
|
|
psm: integer(),
|
||
|
|
output_format: String.t() | nil,
|
||
|
|
oem: integer(),
|
||
|
|
min_confidence: float(),
|
||
|
|
preprocessing: map() | nil,
|
||
|
|
enable_table_detection: boolean(),
|
||
|
|
table_min_confidence: float(),
|
||
|
|
table_column_threshold: integer(),
|
||
|
|
table_row_threshold_ratio: float(),
|
||
|
|
use_cache: boolean(),
|
||
|
|
classify_use_pre_adapted_templates: boolean(),
|
||
|
|
language_model_ngram_on: boolean(),
|
||
|
|
tessedit_dont_blkrej_good_wds: boolean(),
|
||
|
|
tessedit_dont_rowrej_good_wds: boolean(),
|
||
|
|
tessedit_enable_dict_correction: boolean(),
|
||
|
|
tessedit_char_whitelist: String.t() | nil,
|
||
|
|
tessedit_char_blacklist: String.t() | nil,
|
||
|
|
tessedit_use_primary_params_model: boolean(),
|
||
|
|
textord_space_size_is_variable: boolean(),
|
||
|
|
thresholding_method: boolean()
|
||
|
|
}
|
||
|
|
|
||
|
|
defstruct language: "eng",
|
||
|
|
psm: 3,
|
||
|
|
output_format: "markdown",
|
||
|
|
oem: 3,
|
||
|
|
min_confidence: 0,
|
||
|
|
preprocessing: nil,
|
||
|
|
enable_table_detection: true,
|
||
|
|
table_min_confidence: 0,
|
||
|
|
table_column_threshold: 50,
|
||
|
|
table_row_threshold_ratio: 0.5,
|
||
|
|
use_cache: true,
|
||
|
|
classify_use_pre_adapted_templates: true,
|
||
|
|
language_model_ngram_on: false,
|
||
|
|
tessedit_dont_blkrej_good_wds: true,
|
||
|
|
tessedit_dont_rowrej_good_wds: true,
|
||
|
|
tessedit_enable_dict_correction: true,
|
||
|
|
tessedit_char_whitelist: "",
|
||
|
|
tessedit_char_blacklist: "",
|
||
|
|
tessedit_use_primary_params_model: true,
|
||
|
|
textord_space_size_is_variable: true,
|
||
|
|
thresholding_method: false
|
||
|
|
|
||
|
|
defimpl Jason.Encoder do
|
||
|
|
@doc false
|
||
|
|
def encode(value, opts) do
|
||
|
|
value
|
||
|
|
|> Map.from_struct()
|
||
|
|
|> Enum.reject(fn {_k, v} -> v == nil end)
|
||
|
|
|> Enum.into(%{})
|
||
|
|
|> Jason.Encoder.encode(opts)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|