This commit is contained in:
54
docs/snippets/elixir/core/batch_extract_bytes_sync.exs
Normal file
54
docs/snippets/elixir/core/batch_extract_bytes_sync.exs
Normal file
@@ -0,0 +1,54 @@
|
||||
```elixir title="Elixir"
|
||||
# Extract from multiple binary inputs in a batch operation
|
||||
# Useful for processing multiple documents in memory
|
||||
|
||||
# Prepare binary data from multiple sources
|
||||
{:ok, pdf_data_1} = File.read("document1.pdf")
|
||||
{:ok, pdf_data_2} = File.read("document2.pdf")
|
||||
{:ok, pdf_data_3} = File.read("document3.pdf")
|
||||
|
||||
data_list = [pdf_data_1, pdf_data_2, pdf_data_3]
|
||||
|
||||
# Option 1: Use single MIME type for all inputs
|
||||
{:ok, results} = Kreuzberg.batch_extract_bytes(data_list, "application/pdf")
|
||||
|
||||
# Process results
|
||||
Enum.each(results, fn result ->
|
||||
IO.puts("Content length: #{byte_size(result.content)} characters")
|
||||
IO.puts("MIME type: #{result.mime_type}")
|
||||
IO.puts("Tables found: #{length(result.tables)}")
|
||||
IO.puts("---")
|
||||
end)
|
||||
|
||||
IO.puts("Total documents processed: #{length(results)}")
|
||||
|
||||
# Option 2: Use different MIME types for each input
|
||||
mime_types = ["application/pdf", "text/html", "application/pdf"]
|
||||
{:ok, mixed_results} = Kreuzberg.batch_extract_bytes(data_list, mime_types)
|
||||
|
||||
# Option 3: Batch extraction with configuration
|
||||
config = %Kreuzberg.ExtractionConfig{
|
||||
ocr: %{"enabled" => true, "backend" => "tesseract"},
|
||||
extract_images: true
|
||||
}
|
||||
|
||||
case Kreuzberg.batch_extract_bytes(data_list, "application/pdf", config) do
|
||||
{:ok, results} ->
|
||||
IO.puts("Successfully extracted #{length(results)} documents")
|
||||
Enum.each(results, fn result ->
|
||||
IO.puts("Content: #{String.slice(result.content, 0..100)}...")
|
||||
end)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Batch extraction failed: #{reason}")
|
||||
end
|
||||
|
||||
# Option 4: Using the bang variant (raises on error)
|
||||
try do
|
||||
results = Kreuzberg.batch_extract_bytes!(data_list, "application/pdf")
|
||||
IO.puts("Extracted #{length(results)} documents successfully")
|
||||
rescue
|
||||
error in Kreuzberg.Error ->
|
||||
IO.puts("Error: #{error.message}")
|
||||
end
|
||||
```
|
||||
14
docs/snippets/elixir/core/batch_extract_files_sync.exs
Normal file
14
docs/snippets/elixir/core/batch_extract_files_sync.exs
Normal file
@@ -0,0 +1,14 @@
|
||||
```elixir title="Elixir"
|
||||
file_paths = ["document1.pdf", "document2.pdf", "document3.pdf"]
|
||||
|
||||
{:ok, results} = Kreuzberg.batch_extract_files(file_paths)
|
||||
|
||||
Enum.each(results, fn result ->
|
||||
IO.puts("File: #{result.mime_type}")
|
||||
IO.puts("Content length: #{byte_size(result.content)} characters")
|
||||
IO.puts("Tables: #{length(result.tables)}")
|
||||
IO.puts("---")
|
||||
end)
|
||||
|
||||
IO.puts("Total files processed: #{length(results)}")
|
||||
```
|
||||
163
docs/snippets/elixir/core/client_extract_multiple_files.exs
Normal file
163
docs/snippets/elixir/core/client_extract_multiple_files.exs
Normal file
@@ -0,0 +1,163 @@
|
||||
```elixir title="Elixir"
|
||||
# Client wrapper for batch file extraction
|
||||
# Provides error handling, logging, and result aggregation
|
||||
|
||||
defmodule BatchDocumentClient do
|
||||
@moduledoc """
|
||||
Client wrapper for batch file document extraction.
|
||||
Handles multiple files with comprehensive error handling and logging.
|
||||
"""
|
||||
|
||||
alias Kreuzberg.ExtractionResult
|
||||
|
||||
@doc """
|
||||
Extract content from multiple files in batch.
|
||||
|
||||
Returns results for all successfully processed files and logs errors
|
||||
for any files that fail during extraction.
|
||||
|
||||
## Options
|
||||
|
||||
* `:mime_type` - MIME type for all files (optional, defaults to auto-detection)
|
||||
* `:config` - ExtractionConfig struct with options (optional)
|
||||
* `:log_errors` - Whether to log extraction errors (default: true)
|
||||
* `:fail_fast` - Stop on first error (default: false)
|
||||
|
||||
## Examples
|
||||
|
||||
{:ok, results} = BatchDocumentClient.extract_files(
|
||||
["doc1.pdf", "doc2.pdf", "doc3.pdf"],
|
||||
mime_type: "application/pdf"
|
||||
)
|
||||
"""
|
||||
@spec extract_files([String.t()], keyword()) ::
|
||||
{:ok, [ExtractionResult.t()]} | {:error, String.t()}
|
||||
def extract_files(paths, opts \\ []) do
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
config = Keyword.get(opts, :config, nil)
|
||||
log_errors = Keyword.get(opts, :log_errors, true)
|
||||
|
||||
case Kreuzberg.batch_extract_files(paths, mime_type, config) do
|
||||
{:ok, results} ->
|
||||
IO.debug("Successfully extracted #{length(results)} files")
|
||||
{:ok, results}
|
||||
|
||||
{:error, reason} ->
|
||||
if log_errors do
|
||||
IO.debug("Batch extraction error: #{reason}")
|
||||
end
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract files and return detailed statistics.
|
||||
|
||||
Returns aggregated metrics about all processed files.
|
||||
"""
|
||||
@spec extract_files_with_stats([String.t()], keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_files_with_stats(paths, opts \\ []) do
|
||||
start_time = System.monotonic_time(:millisecond)
|
||||
|
||||
case extract_files(paths, opts) do
|
||||
{:ok, results} ->
|
||||
elapsed_ms = System.monotonic_time(:millisecond) - start_time
|
||||
|
||||
stats = %{
|
||||
total_files: length(results),
|
||||
total_content_size: Enum.reduce(results, 0, &(byte_size(&1.content) + &2)),
|
||||
total_tables: Enum.reduce(results, 0, &(length(&1.tables) + &2)),
|
||||
total_images: Enum.reduce(results, 0, &(length(&1.images || []) + &2)),
|
||||
processing_time_ms: elapsed_ms,
|
||||
avg_time_per_file_ms: div(elapsed_ms, max(length(results), 1)),
|
||||
results: results
|
||||
}
|
||||
|
||||
{:ok, stats}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract files and transform results.
|
||||
|
||||
Applies a transformation function to each extraction result.
|
||||
Useful for custom processing or formatting of results.
|
||||
"""
|
||||
@spec extract_and_transform([String.t()], function(), keyword()) ::
|
||||
{:ok, [any()]} | {:error, String.t()}
|
||||
def extract_and_transform(paths, transform_fn, opts \\ []) do
|
||||
case extract_files(paths, opts) do
|
||||
{:ok, results} ->
|
||||
transformed =
|
||||
results
|
||||
|> Enum.map(fn result ->
|
||||
try do
|
||||
{:ok, transform_fn.(result)}
|
||||
rescue
|
||||
error ->
|
||||
IO.debug("Transform error: #{inspect(error)}")
|
||||
{:error, error}
|
||||
end
|
||||
end)
|
||||
|
||||
# Check if any transforms failed
|
||||
case Enum.find(transformed, fn r -> match?({:error, _}, r) end) do
|
||||
nil ->
|
||||
# All succeeded
|
||||
{:ok, Enum.map(transformed, fn {:ok, value} -> value end)}
|
||||
|
||||
{:error, error} ->
|
||||
{:error, "Transform failed: #{inspect(error)}"}
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Usage examples
|
||||
|
||||
# Extract multiple files
|
||||
case BatchDocumentClient.extract_files(["doc1.pdf", "doc2.pdf", "doc3.pdf"]) do
|
||||
{:ok, results} ->
|
||||
Enum.each(results, fn result ->
|
||||
IO.puts("Extracted: #{byte_size(result.content)} bytes")
|
||||
end)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
|
||||
# Extract with statistics
|
||||
case BatchDocumentClient.extract_files_with_stats(["doc1.pdf", "doc2.pdf"]) do
|
||||
{:ok, stats} ->
|
||||
IO.puts("Total files: #{stats.total_files}")
|
||||
IO.puts("Total size: #{stats.total_content_size} bytes")
|
||||
IO.puts("Processing time: #{stats.processing_time_ms}ms")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
|
||||
# Extract and transform
|
||||
transform = fn result ->
|
||||
%{
|
||||
mime: result.mime_type,
|
||||
size: byte_size(result.content),
|
||||
tables: length(result.tables)
|
||||
}
|
||||
end
|
||||
|
||||
case BatchDocumentClient.extract_and_transform(["doc1.pdf", "doc2.pdf"], transform) do
|
||||
{:ok, transformed_results} ->
|
||||
IO.inspect(transformed_results)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
```
|
||||
107
docs/snippets/elixir/core/client_extract_single_file.exs
Normal file
107
docs/snippets/elixir/core/client_extract_single_file.exs
Normal file
@@ -0,0 +1,107 @@
|
||||
```elixir title="Elixir"
|
||||
# Reusable client pattern for single file extraction
|
||||
# Encapsulates extraction logic with consistent error handling
|
||||
|
||||
defmodule DocumentClient do
|
||||
@moduledoc """
|
||||
Client wrapper for single file document extraction.
|
||||
Provides a consistent interface for extracting content from files.
|
||||
"""
|
||||
|
||||
alias Kreuzberg.ExtractionResult
|
||||
|
||||
@doc """
|
||||
Extract content from a single file.
|
||||
|
||||
Returns a result tuple with the extraction outcome.
|
||||
Supports explicit MIME type specification or auto-detection.
|
||||
|
||||
## Options
|
||||
|
||||
* `:mime_type` - MIME type of the file (optional, defaults to auto-detection)
|
||||
* `:config` - ExtractionConfig struct with options (optional)
|
||||
|
||||
## Examples
|
||||
|
||||
{:ok, result} = DocumentClient.extract_file("document.pdf")
|
||||
{:ok, result} = DocumentClient.extract_file("document.pdf", mime_type: "application/pdf")
|
||||
"""
|
||||
@spec extract_file(String.t(), keyword()) ::
|
||||
{:ok, ExtractionResult.t()} | {:error, String.t()}
|
||||
def extract_file(path, opts \\ []) do
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
config = Keyword.get(opts, :config, nil)
|
||||
|
||||
case Kreuzberg.extract_file(path, mime_type, config) do
|
||||
{:ok, result} ->
|
||||
IO.debug("Successfully extracted file: #{path}")
|
||||
{:ok, result}
|
||||
|
||||
{:error, reason} ->
|
||||
IO.debug("Failed to extract file: #{path} - #{reason}")
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract content from a file, raising on error.
|
||||
|
||||
Raises Kreuzberg.Error if extraction fails.
|
||||
"""
|
||||
@spec extract_file!(String.t(), keyword()) :: ExtractionResult.t()
|
||||
def extract_file!(path, opts \\ []) do
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
config = Keyword.get(opts, :config, nil)
|
||||
|
||||
Kreuzberg.extract_file!(path, mime_type, config)
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract and process file content.
|
||||
|
||||
Returns a map with extracted content, metadata, and processing statistics.
|
||||
"""
|
||||
@spec extract_with_stats(String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_with_stats(path, opts \\ []) do
|
||||
start_time = System.monotonic_time(:millisecond)
|
||||
|
||||
case extract_file(path, opts) do
|
||||
{:ok, result} ->
|
||||
elapsed_ms = System.monotonic_time(:millisecond) - start_time
|
||||
|
||||
{:ok,
|
||||
%{
|
||||
content: result.content,
|
||||
mime_type: result.mime_type,
|
||||
metadata: result.metadata,
|
||||
table_count: length(result.tables),
|
||||
image_count: length(result.images || []),
|
||||
processing_time_ms: elapsed_ms
|
||||
}}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Usage examples
|
||||
case DocumentClient.extract_file("document.pdf") do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content length: #{byte_size(result.content)} bytes")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
end
|
||||
|
||||
# Extract with statistics
|
||||
case DocumentClient.extract_with_stats("document.pdf") do
|
||||
{:ok, stats} ->
|
||||
IO.puts("Processing time: #{stats.processing_time_ms}ms")
|
||||
IO.puts("Tables found: #{stats.table_count}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
```
|
||||
247
docs/snippets/elixir/core/client_extract_with_config.exs
Normal file
247
docs/snippets/elixir/core/client_extract_with_config.exs
Normal file
@@ -0,0 +1,247 @@
|
||||
```elixir title="Elixir"
|
||||
# Client wrapper with comprehensive configuration
|
||||
# Demonstrates advanced extraction patterns with OCR, chunking, and validation
|
||||
|
||||
defmodule ConfiguredDocumentClient do
|
||||
@moduledoc """
|
||||
Client wrapper for document extraction with advanced configuration.
|
||||
Supports OCR, chunking, language detection, and custom error handling.
|
||||
"""
|
||||
|
||||
alias Kreuzberg.{ExtractionConfig, ExtractionResult}
|
||||
|
||||
@doc """
|
||||
Extract with OCR enabled for scanned documents.
|
||||
|
||||
Uses Tesseract OCR backend for text extraction from images.
|
||||
"""
|
||||
@spec extract_with_ocr(String.t() | binary(), keyword()) ::
|
||||
{:ok, ExtractionResult.t()} | {:error, String.t()}
|
||||
def extract_with_ocr(input, opts \\ []) do
|
||||
is_file = is_binary(input) and File.exists?(input)
|
||||
|
||||
config = %ExtractionConfig{
|
||||
ocr: %{
|
||||
"enabled" => true,
|
||||
"backend" => Keyword.get(opts, :ocr_backend, "tesseract")
|
||||
},
|
||||
force_ocr: Keyword.get(opts, :force_ocr, false)
|
||||
}
|
||||
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
|
||||
case is_file do
|
||||
true -> Kreuzberg.extract_file(input, mime_type, config)
|
||||
false -> Kreuzberg.extract(input, mime_type || "application/pdf", config)
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract with text chunking for embedding or RAG pipelines.
|
||||
|
||||
Splits extracted text into chunks with configurable size and overlap.
|
||||
"""
|
||||
@spec extract_with_chunking(String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_with_chunking(path, opts \\ []) do
|
||||
config = %ExtractionConfig{
|
||||
chunking: %{
|
||||
"max_characters" => Keyword.get(opts, :chunk_size, 1000),
|
||||
"overlap" => Keyword.get(opts, :chunk_overlap, 100)
|
||||
}
|
||||
}
|
||||
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
|
||||
case Kreuzberg.extract_file(path, mime_type, config) do
|
||||
{:ok, result} ->
|
||||
chunks = result.chunks || []
|
||||
|
||||
{:ok,
|
||||
%{
|
||||
content: result.content,
|
||||
chunks: chunks,
|
||||
chunk_count: length(chunks),
|
||||
metadata: result.metadata
|
||||
}}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract with language detection and multi-language support.
|
||||
|
||||
Detects document languages and can extract from specific languages.
|
||||
"""
|
||||
@spec extract_with_language_detection(String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_with_language_detection(path, opts \\ []) do
|
||||
config = %ExtractionConfig{
|
||||
language_detection: %{"enabled" => true},
|
||||
extract_images: true
|
||||
}
|
||||
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
|
||||
case Kreuzberg.extract_file(path, mime_type, config) do
|
||||
{:ok, result} ->
|
||||
{:ok,
|
||||
%{
|
||||
content: result.content,
|
||||
detected_languages: result.detected_languages || [],
|
||||
mime_type: result.mime_type,
|
||||
tables: result.tables,
|
||||
images: result.images || []
|
||||
}}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract with comprehensive configuration for production use.
|
||||
|
||||
Combines OCR, chunking, caching, and language detection with error handling.
|
||||
"""
|
||||
@spec extract_with_full_config(String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_with_full_config(path, opts \\ []) do
|
||||
config = %ExtractionConfig{
|
||||
# OCR settings
|
||||
ocr: %{
|
||||
"enabled" => Keyword.get(opts, :ocr_enabled, true),
|
||||
"backend" => Keyword.get(opts, :ocr_backend, "tesseract")
|
||||
},
|
||||
# Chunking for embeddings
|
||||
chunking: %{
|
||||
"max_characters" => Keyword.get(opts, :chunk_size, 1000),
|
||||
"overlap" => Keyword.get(opts, :chunk_overlap, 100)
|
||||
},
|
||||
# Language detection
|
||||
language_detection: %{"enabled" => Keyword.get(opts, :detect_language, true)},
|
||||
# Cache results
|
||||
use_cache: Keyword.get(opts, :use_cache, true),
|
||||
# Extract various content types
|
||||
extract_images: Keyword.get(opts, :extract_images, true),
|
||||
extract_tables: true
|
||||
}
|
||||
|
||||
mime_type = Keyword.get(opts, :mime_type, nil)
|
||||
|
||||
case Kreuzberg.extract_file(path, mime_type, config) do
|
||||
{:ok, result} ->
|
||||
summary = %{
|
||||
file_path: path,
|
||||
mime_type: result.mime_type,
|
||||
content_length: byte_size(result.content),
|
||||
content_preview: String.slice(result.content, 0..200),
|
||||
detected_languages: result.detected_languages || [],
|
||||
table_count: length(result.tables),
|
||||
image_count: length(result.images || []),
|
||||
chunk_count: length(result.chunks || []),
|
||||
metadata: result.metadata
|
||||
}
|
||||
|
||||
{:ok, summary}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, "Extraction failed: #{reason}"}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Validate file before extraction.
|
||||
|
||||
Checks file existence and MIME type compatibility.
|
||||
"""
|
||||
@spec validate_file(String.t()) :: :ok | {:error, String.t()}
|
||||
def validate_file(path) do
|
||||
cond do
|
||||
not File.exists?(path) ->
|
||||
{:error, "File not found: #{path}"}
|
||||
|
||||
true ->
|
||||
case Kreuzberg.detect_mime_type_from_path(path) do
|
||||
{:ok, _mime_type} ->
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, "Cannot determine MIME type: #{reason}"}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract with validation and error recovery.
|
||||
|
||||
Validates file before extraction and provides detailed error information.
|
||||
"""
|
||||
@spec extract_safely(String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_safely(path, opts \\ []) do
|
||||
with :ok <- validate_file(path),
|
||||
{:ok, summary} <- extract_with_full_config(path, opts) do
|
||||
{:ok, summary}
|
||||
else
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Usage examples
|
||||
|
||||
# Extract with OCR
|
||||
case ConfiguredDocumentClient.extract_with_ocr("scanned_document.pdf",
|
||||
ocr_backend: "tesseract"
|
||||
) do
|
||||
{:ok, result} ->
|
||||
IO.puts("OCR extraction successful")
|
||||
IO.puts("Content: #{String.slice(result.content, 0..100)}...")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
|
||||
# Extract with chunking
|
||||
case ConfiguredDocumentClient.extract_with_chunking("document.pdf",
|
||||
chunk_size: 500,
|
||||
chunk_overlap: 50
|
||||
) do
|
||||
{:ok, data} ->
|
||||
IO.puts("Chunks: #{data.chunk_count}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
|
||||
# Extract with language detection
|
||||
case ConfiguredDocumentClient.extract_with_language_detection("multilingual.pdf") do
|
||||
{:ok, data} ->
|
||||
IO.puts("Detected languages: #{inspect(data.detected_languages)}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
|
||||
# Comprehensive extraction with validation
|
||||
case ConfiguredDocumentClient.extract_safely("document.pdf",
|
||||
ocr_enabled: true,
|
||||
detect_language: true,
|
||||
extract_images: true,
|
||||
use_cache: true
|
||||
) do
|
||||
{:ok, summary} ->
|
||||
IO.puts("File: #{summary.file_path}")
|
||||
IO.puts("MIME: #{summary.mime_type}")
|
||||
IO.puts("Size: #{summary.content_length} bytes")
|
||||
IO.puts("Tables: #{summary.table_count}")
|
||||
IO.puts("Languages: #{inspect(summary.detected_languages)}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
```
|
||||
31
docs/snippets/elixir/core/error_handling.exs
Normal file
31
docs/snippets/elixir/core/error_handling.exs
Normal file
@@ -0,0 +1,31 @@
|
||||
```elixir title="Elixir"
|
||||
# Example: Handling extraction errors
|
||||
case Kreuzberg.extract_file("document.pdf") do
|
||||
{:ok, result} ->
|
||||
IO.puts("Successfully extracted content")
|
||||
IO.puts("Content length: #{byte_size(result.content)} characters")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
end
|
||||
|
||||
# Example: Handling with custom error message
|
||||
result = Kreuzberg.extract_file("nonexistent.pdf")
|
||||
|
||||
case result do
|
||||
{:ok, data} ->
|
||||
IO.puts("File processed successfully")
|
||||
{:error, error} ->
|
||||
IO.puts("Error details: #{inspect(error)}")
|
||||
end
|
||||
|
||||
# Example: Extract with pattern matching
|
||||
case Kreuzberg.extract(<<>>, "application/pdf") do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content: #{result.content}")
|
||||
{:error, msg} when is_binary(msg) ->
|
||||
IO.puts("Validation error: #{msg}")
|
||||
{:error, reason} ->
|
||||
IO.puts("Unknown error: #{inspect(reason)}")
|
||||
end
|
||||
```
|
||||
13
docs/snippets/elixir/core/extract_bytes_sync.exs
Normal file
13
docs/snippets/elixir/core/extract_bytes_sync.exs
Normal file
@@ -0,0 +1,13 @@
|
||||
```elixir title="Elixir"
|
||||
# Read file into memory
|
||||
{:ok, file_content} = File.read("document.pdf")
|
||||
|
||||
# Extract from bytes/binary data
|
||||
{:ok, result} = Kreuzberg.extract(file_content, "application/pdf")
|
||||
|
||||
content = result.content
|
||||
IO.puts("Extracted content:")
|
||||
IO.puts(content)
|
||||
IO.puts("MIME type: #{result.mime_type}")
|
||||
IO.puts("Tables found: #{length(result.tables)}")
|
||||
```
|
||||
12
docs/snippets/elixir/core/extract_file_async.exs
Normal file
12
docs/snippets/elixir/core/extract_file_async.exs
Normal file
@@ -0,0 +1,12 @@
|
||||
```elixir title="Elixir"
|
||||
task = Kreuzberg.extract_file_async("document.pdf")
|
||||
{:ok, result} = Task.await(task)
|
||||
|
||||
content = result.content
|
||||
table_count = length(result.tables)
|
||||
metadata = result.metadata
|
||||
|
||||
IO.puts("Content length: #{byte_size(content)} characters")
|
||||
IO.puts("Tables: #{table_count}")
|
||||
IO.puts("Metadata keys: #{inspect(Map.keys(metadata))}")
|
||||
```
|
||||
11
docs/snippets/elixir/core/extract_file_sync.exs
Normal file
11
docs/snippets/elixir/core/extract_file_sync.exs
Normal file
@@ -0,0 +1,11 @@
|
||||
```elixir title="Elixir"
|
||||
{:ok, result} = Kreuzberg.extract_file("document.pdf")
|
||||
|
||||
content = result.content
|
||||
table_count = length(result.tables)
|
||||
metadata = result.metadata
|
||||
|
||||
IO.puts("Content length: #{byte_size(content)} characters")
|
||||
IO.puts("Tables: #{table_count}")
|
||||
IO.puts("Metadata keys: #{inspect(Map.keys(metadata))}")
|
||||
```
|
||||
Reference in New Issue
Block a user