156 lines
4.3 KiB
Elixir
156 lines
4.3 KiB
Elixir
# Error handling patterns for Kreuzberg extraction
|
|
defmodule ErrorHandlingUtils do
|
|
@doc """
|
|
Safely extract a file with retry logic and error recovery.
|
|
"""
|
|
def extract_with_retry(file_path, config, max_retries \\ 3) do
|
|
extract_with_retry(file_path, config, max_retries, 1, nil)
|
|
end
|
|
|
|
defp extract_with_retry(_file_path, _config, max_retries, attempt, _error)
|
|
when attempt > max_retries do
|
|
{:error, "Max retries (#{max_retries}) exceeded"}
|
|
end
|
|
|
|
defp extract_with_retry(file_path, config, max_retries, attempt, _prev_error) do
|
|
case Kreuzberg.extract_file(file_path, nil, config) do
|
|
{:ok, result} ->
|
|
{:ok, result}
|
|
|
|
{:error, reason} ->
|
|
IO.puts("Attempt #{attempt} failed: #{inspect(reason)}")
|
|
Process.sleep(100 * attempt) # Exponential backoff
|
|
extract_with_retry(file_path, config, max_retries, attempt + 1, reason)
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Extract multiple files and collect results and errors separately.
|
|
"""
|
|
def extract_multiple(files, config) do
|
|
files
|
|
|> Enum.map(fn file ->
|
|
{file, Kreuzberg.extract_file(file, nil, config)}
|
|
end)
|
|
|> Enum.reduce(%{successes: [], failures: []}, fn {file, result}, acc ->
|
|
case result do
|
|
{:ok, data} ->
|
|
Map.update!(acc, :successes, &[{file, data} | &1])
|
|
|
|
{:error, reason} ->
|
|
Map.update!(acc, :failures, &[{file, reason} | &1])
|
|
end
|
|
end)
|
|
|> then(fn acc ->
|
|
%{
|
|
acc
|
|
| successes: Enum.reverse(acc.successes),
|
|
failures: Enum.reverse(acc.failures)
|
|
}
|
|
end)
|
|
end
|
|
|
|
@doc """
|
|
Validate extraction result and return detailed error information.
|
|
"""
|
|
def validate_result(result, required_fields \\ ["text", "metadata"]) do
|
|
case result do
|
|
{:ok, data} ->
|
|
missing = Enum.filter(required_fields, &(!Map.has_key?(data, &1)))
|
|
|
|
if Enum.empty?(missing) do
|
|
{:ok, data}
|
|
else
|
|
{:error, "Missing required fields: #{inspect(missing)}"}
|
|
end
|
|
|
|
{:error, reason} ->
|
|
{:error, format_error(reason)}
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Format errors into human-readable messages.
|
|
"""
|
|
def format_error(reason) when is_binary(reason), do: reason
|
|
|
|
def format_error(reason) when is_atom(reason) do
|
|
case reason do
|
|
:file_not_found -> "The specified file could not be found"
|
|
:invalid_format -> "The file format is not supported"
|
|
:extraction_failed -> "Failed to extract content from the file"
|
|
:timeout -> "Extraction operation timed out"
|
|
:permission_denied -> "Permission denied when accessing the file"
|
|
other -> "Unknown error: #{inspect(other)}"
|
|
end
|
|
end
|
|
|
|
def format_error(reason), do: inspect(reason)
|
|
|
|
@doc """
|
|
Log extraction metrics for debugging and monitoring.
|
|
"""
|
|
def log_metrics(file_path, result, duration_ms) do
|
|
status =
|
|
case result do
|
|
{:ok, _} -> "success"
|
|
{:error, _} -> "failure"
|
|
end
|
|
|
|
IO.puts("""
|
|
[#{DateTime.utc_now()}] Extraction Metrics
|
|
- File: #{file_path}
|
|
- Status: #{status}
|
|
- Duration: #{duration_ms}ms
|
|
""")
|
|
|
|
case result do
|
|
{:ok, data} ->
|
|
IO.puts("- Chunks: #{length(data.chunks || [])}")
|
|
IO.puts("- Text length: #{String.length(data.content || "")}")
|
|
|
|
{:error, reason} ->
|
|
IO.puts("- Error: #{format_error(reason)}")
|
|
end
|
|
end
|
|
end
|
|
|
|
# Example usage with error handling
|
|
config = %Kreuzberg.ExtractionConfig{
|
|
chunking: %{"enabled" => true, "max_characters" => 1000}
|
|
}
|
|
|
|
IO.puts("=== Extract with Retry ===")
|
|
|
|
case ErrorHandlingUtils.extract_with_retry("document.pdf", config, 3) do
|
|
{:ok, result} ->
|
|
IO.puts("Extraction succeeded")
|
|
IO.inspect(result)
|
|
|
|
{:error, reason} ->
|
|
IO.puts("Extraction failed: #{reason}")
|
|
end
|
|
|
|
IO.puts("\n=== Extract Multiple Files ===")
|
|
|
|
files = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
|
|
|
|
case ErrorHandlingUtils.extract_multiple(files, config) do
|
|
results ->
|
|
IO.puts("Successes: #{length(results.successes)}")
|
|
IO.puts("Failures: #{length(results.failures)}")
|
|
IO.inspect(results)
|
|
end
|
|
|
|
IO.puts("\n=== Validate Result ===")
|
|
|
|
{:ok, result} = Kreuzberg.extract_file("test.pdf", nil, config)
|
|
|
|
case ErrorHandlingUtils.validate_result(result, ["text", "chunks"]) do
|
|
{:ok, data} ->
|
|
IO.puts("Validation passed")
|
|
|
|
{:error, reason} ->
|
|
IO.puts("Validation failed: #{reason}")
|
|
end
|