This commit is contained in:
155
docs/snippets/elixir/utils/error_handling.exs
Normal file
155
docs/snippets/elixir/utils/error_handling.exs
Normal file
@@ -0,0 +1,155 @@
|
||||
# Error handling patterns for Kreuzberg extraction
|
||||
defmodule ErrorHandlingUtils do
|
||||
@doc """
|
||||
Safely extract a file with retry logic and error recovery.
|
||||
"""
|
||||
def extract_with_retry(file_path, config, max_retries \\ 3) do
|
||||
extract_with_retry(file_path, config, max_retries, 1, nil)
|
||||
end
|
||||
|
||||
defp extract_with_retry(_file_path, _config, max_retries, attempt, _error)
|
||||
when attempt > max_retries do
|
||||
{:error, "Max retries (#{max_retries}) exceeded"}
|
||||
end
|
||||
|
||||
defp extract_with_retry(file_path, config, max_retries, attempt, _prev_error) do
|
||||
case Kreuzberg.extract_file(file_path, nil, config) do
|
||||
{:ok, result} ->
|
||||
{:ok, result}
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Attempt #{attempt} failed: #{inspect(reason)}")
|
||||
Process.sleep(100 * attempt) # Exponential backoff
|
||||
extract_with_retry(file_path, config, max_retries, attempt + 1, reason)
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract multiple files and collect results and errors separately.
|
||||
"""
|
||||
def extract_multiple(files, config) do
|
||||
files
|
||||
|> Enum.map(fn file ->
|
||||
{file, Kreuzberg.extract_file(file, nil, config)}
|
||||
end)
|
||||
|> Enum.reduce(%{successes: [], failures: []}, fn {file, result}, acc ->
|
||||
case result do
|
||||
{:ok, data} ->
|
||||
Map.update!(acc, :successes, &[{file, data} | &1])
|
||||
|
||||
{:error, reason} ->
|
||||
Map.update!(acc, :failures, &[{file, reason} | &1])
|
||||
end
|
||||
end)
|
||||
|> then(fn acc ->
|
||||
%{
|
||||
acc
|
||||
| successes: Enum.reverse(acc.successes),
|
||||
failures: Enum.reverse(acc.failures)
|
||||
}
|
||||
end)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Validate extraction result and return detailed error information.
|
||||
"""
|
||||
def validate_result(result, required_fields \\ ["text", "metadata"]) do
|
||||
case result do
|
||||
{:ok, data} ->
|
||||
missing = Enum.filter(required_fields, &(!Map.has_key?(data, &1)))
|
||||
|
||||
if Enum.empty?(missing) do
|
||||
{:ok, data}
|
||||
else
|
||||
{:error, "Missing required fields: #{inspect(missing)}"}
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, format_error(reason)}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Format errors into human-readable messages.
|
||||
"""
|
||||
def format_error(reason) when is_binary(reason), do: reason
|
||||
|
||||
def format_error(reason) when is_atom(reason) do
|
||||
case reason do
|
||||
:file_not_found -> "The specified file could not be found"
|
||||
:invalid_format -> "The file format is not supported"
|
||||
:extraction_failed -> "Failed to extract content from the file"
|
||||
:timeout -> "Extraction operation timed out"
|
||||
:permission_denied -> "Permission denied when accessing the file"
|
||||
other -> "Unknown error: #{inspect(other)}"
|
||||
end
|
||||
end
|
||||
|
||||
def format_error(reason), do: inspect(reason)
|
||||
|
||||
@doc """
|
||||
Log extraction metrics for debugging and monitoring.
|
||||
"""
|
||||
def log_metrics(file_path, result, duration_ms) do
|
||||
status =
|
||||
case result do
|
||||
{:ok, _} -> "success"
|
||||
{:error, _} -> "failure"
|
||||
end
|
||||
|
||||
IO.puts("""
|
||||
[#{DateTime.utc_now()}] Extraction Metrics
|
||||
- File: #{file_path}
|
||||
- Status: #{status}
|
||||
- Duration: #{duration_ms}ms
|
||||
""")
|
||||
|
||||
case result do
|
||||
{:ok, data} ->
|
||||
IO.puts("- Chunks: #{length(data.chunks || [])}")
|
||||
IO.puts("- Text length: #{String.length(data.content || "")}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("- Error: #{format_error(reason)}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Example usage with error handling
|
||||
config = %Kreuzberg.ExtractionConfig{
|
||||
chunking: %{"enabled" => true, "max_characters" => 1000}
|
||||
}
|
||||
|
||||
IO.puts("=== Extract with Retry ===")
|
||||
|
||||
case ErrorHandlingUtils.extract_with_retry("document.pdf", config, 3) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Extraction succeeded")
|
||||
IO.inspect(result)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
end
|
||||
|
||||
IO.puts("\n=== Extract Multiple Files ===")
|
||||
|
||||
files = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
|
||||
|
||||
case ErrorHandlingUtils.extract_multiple(files, config) do
|
||||
results ->
|
||||
IO.puts("Successes: #{length(results.successes)}")
|
||||
IO.puts("Failures: #{length(results.failures)}")
|
||||
IO.inspect(results)
|
||||
end
|
||||
|
||||
IO.puts("\n=== Validate Result ===")
|
||||
|
||||
{:ok, result} = Kreuzberg.extract_file("test.pdf", nil, config)
|
||||
|
||||
case ErrorHandlingUtils.validate_result(result, ["text", "chunks"]) do
|
||||
{:ok, data} ->
|
||||
IO.puts("Validation passed")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Validation failed: #{reason}")
|
||||
end
|
||||
Reference in New Issue
Block a user