Files
fil/docs/snippets/elixir/config/advanced_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.2 KiB

config_json = Jason.encode!(%{
  "use_cache" => true,
  "enable_quality_processing" => true,
  "ocr" => %{
    "backend" => "tesseract",
    "language" => "eng"
  },
  "chunking" => %{
    "max_characters" => 1000,
    "overlap" => 200,
    "embedding" => %{
      "model" => %{
        "preset" => %{
          "name" => "balanced"
        }
      },
      "batch_size" => 32,
      "normalize" => true,
      "show_download_progress" => false
    }
  },
  "language_detection" => %{
    "enabled" => true,
    "min_confidence" => 0.8,
    "detect_multiple" => false
  },
  "keywords" => %{
    "algorithm" => "Yake",
    "max_keywords" => 10,
    "min_score" => 0.1,
    "ngram_range" => [1, 3],
    "language" => "en"
  },
  "token_reduction" => %{
    "mode" => "moderate",
    "preserve_important_words" => true
  },
  "postprocessor" => %{
    "enabled" => true
  }
})

{:ok, result} = Kreuzberg.extract_file_sync("document.pdf", "application/pdf", config_json)
IO.puts("Content: #{result.content}")

if result.detected_languages do
  IO.puts("Languages: #{inspect(result.detected_languages)}")
end

chunks_count = if result.chunks, do: length(result.chunks), else: 0
IO.puts("Chunks: #{chunks_count}")