This commit is contained in:
25
docs/snippets/elixir/api/batch_extract_bytes_sync.md
Normal file
25
docs/snippets/elixir/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def batch_extract_bytes do
|
||||
# Note: Batch extraction in Elixir is done via Task.async_stream over sync calls
|
||||
files = ["doc1.pdf", "doc2.docx", "report.pdf"]
|
||||
config = nil
|
||||
|
||||
results =
|
||||
files
|
||||
|> Task.async_stream(
|
||||
fn file ->
|
||||
content = File.read!(file)
|
||||
Kreuzberg.extract_bytes_sync(content, "application/pdf", config)
|
||||
end,
|
||||
max_concurrency: 4
|
||||
)
|
||||
|> Enum.map(fn {:ok, result} -> result end)
|
||||
|
||||
Enum.each(results, fn
|
||||
{:ok, content} -> IO.puts("Extracted: #{String.length(content)} chars")
|
||||
{:error, reason} -> IO.puts("Error: #{reason}")
|
||||
end)
|
||||
end
|
||||
end
|
||||
```
|
||||
23
docs/snippets/elixir/api/batch_extract_files_sync.md
Normal file
23
docs/snippets/elixir/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,23 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def batch_extract_files do
|
||||
files = ["doc1.pdf", "doc2.docx", "report.pdf"]
|
||||
config = nil
|
||||
|
||||
results =
|
||||
files
|
||||
|> Task.async_stream(
|
||||
fn file ->
|
||||
Kreuzberg.extract_file_sync(file, nil, config)
|
||||
end,
|
||||
max_concurrency: 4
|
||||
)
|
||||
|> Enum.map(fn {:ok, result} -> result end)
|
||||
|
||||
Enum.each(results, fn
|
||||
{:ok, content} -> IO.puts("File extracted: #{String.length(content)} chars")
|
||||
{:error, reason} -> IO.puts("Error: #{reason}")
|
||||
end)
|
||||
end
|
||||
end
|
||||
```
|
||||
30
docs/snippets/elixir/api/client_chunk_text.md
Normal file
30
docs/snippets/elixir/api/client_chunk_text.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def chunk_text_via_http do
|
||||
text = "Your long document text here..."
|
||||
|
||||
config = %{
|
||||
"chunking" => %{
|
||||
"max_characters" => 800,
|
||||
"overlap" => 100,
|
||||
"chunker_type" => "Markdown"
|
||||
}
|
||||
}
|
||||
|
||||
with {:ok, response} <- Req.post(
|
||||
"http://localhost:8000/chunk",
|
||||
json: %{
|
||||
"text" => text,
|
||||
"config" => config
|
||||
}
|
||||
),
|
||||
{:ok, body} <- Jason.decode(response.body) do
|
||||
chunks = body["chunks"]
|
||||
IO.puts("Created #{length(chunks)} chunks")
|
||||
{:ok, chunks}
|
||||
else
|
||||
error -> {:error, inspect(error)}
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
19
docs/snippets/elixir/api/client_extract_single_file.md
Normal file
19
docs/snippets/elixir/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def extract_via_http do
|
||||
file_path = "document.pdf"
|
||||
|
||||
with {:ok, file} <- File.read(file_path),
|
||||
{:ok, response} <- Req.post(
|
||||
"http://localhost:8000/extract",
|
||||
form: [file: {:file, file_path}]
|
||||
),
|
||||
{:ok, body} <- Jason.decode(response.body) do
|
||||
IO.puts("Extracted content: #{body["content"]}")
|
||||
{:ok, body}
|
||||
else
|
||||
error -> {:error, inspect(error)}
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
41
docs/snippets/elixir/api/combining_all_features.md
Normal file
41
docs/snippets/elixir/api/combining_all_features.md
Normal file
@@ -0,0 +1,41 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def full_extraction_pipeline do
|
||||
# Build a comprehensive extraction config as a JSON string or map
|
||||
config_json = Jason.encode!(%{
|
||||
"use_cache" => true,
|
||||
"enable_quality_processing" => true,
|
||||
"force_ocr" => false,
|
||||
"ocr" => %{
|
||||
"backend" => "tesseract",
|
||||
"language" => "eng"
|
||||
},
|
||||
"chunking" => %{
|
||||
"max_characters" => 800,
|
||||
"overlap" => 100,
|
||||
"chunker_type" => "Markdown",
|
||||
"prepend_heading_context" => true
|
||||
},
|
||||
"output_format" => "Markdown",
|
||||
"include_document_structure" => true,
|
||||
"images" => %{
|
||||
"extract_images" => true
|
||||
},
|
||||
"language_detection" => %{
|
||||
"detect" => true
|
||||
}
|
||||
})
|
||||
|
||||
case Kreuzberg.extract_file_sync("report.pdf", nil, config_json) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Extraction successful")
|
||||
IO.puts("Content length: #{String.length(result)} chars")
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
24
docs/snippets/elixir/api/error_handling.md
Normal file
24
docs/snippets/elixir/api/error_handling.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def handle_extraction_errors do
|
||||
# Extract with invalid MIME type
|
||||
case Kreuzberg.extract_file_sync("document.txt", nil, nil) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Success: #{String.length(result)} chars")
|
||||
|
||||
{:error, reason} when is_binary(reason) ->
|
||||
# Error is a string description
|
||||
case reason do
|
||||
msg when String.contains?(msg, "unsupported") ->
|
||||
IO.puts("Unsupported format: #{msg}")
|
||||
|
||||
msg when String.contains?(msg, "not found") ->
|
||||
IO.puts("File not found: #{msg}")
|
||||
|
||||
msg ->
|
||||
IO.puts("Extraction failed: #{msg}")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
28
docs/snippets/elixir/api/error_handling_extract.md
Normal file
28
docs/snippets/elixir/api/error_handling_extract.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def robust_extract(path) do
|
||||
with {:file_exists, true} <- {:file_exists, File.exists?(path)},
|
||||
{:read, {:ok, content}} <- {:read, File.read(path)},
|
||||
{:mime, {:ok, mime_type}} <- {:mime, detect_mime_type(content)},
|
||||
{:extract, {:ok, result}} <- {:extract, Kreuzberg.extract_bytes_sync(content, mime_type, nil)} do
|
||||
{:ok, result}
|
||||
else
|
||||
{:file_exists, false} ->
|
||||
{:error, "File not found: #{path}"}
|
||||
|
||||
{:read, {:error, reason}} ->
|
||||
{:error, "Failed to read file: #{inspect(reason)}"}
|
||||
|
||||
{:mime, {:error, reason}} ->
|
||||
{:error, "MIME detection failed: #{reason}"}
|
||||
|
||||
{:extract, {:error, reason}} ->
|
||||
{:error, "Extraction failed: #{reason}"}
|
||||
end
|
||||
end
|
||||
|
||||
defp detect_mime_type(content) do
|
||||
Kreuzberg.detect_mime_type_from_bytes(content)
|
||||
end
|
||||
end
|
||||
```
|
||||
22
docs/snippets/elixir/api/extract_bytes_async.md
Normal file
22
docs/snippets/elixir/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def extract_from_bytes_async do
|
||||
content = File.read!("document.pdf")
|
||||
config = nil
|
||||
|
||||
task = Task.async(fn ->
|
||||
Kreuzberg.extract_bytes_async(content, "application/pdf", config)
|
||||
end)
|
||||
|
||||
case Task.await(task) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content: #{result}")
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
18
docs/snippets/elixir/api/extract_bytes_sync.md
Normal file
18
docs/snippets/elixir/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def extract_from_bytes do
|
||||
content = File.read!("document.pdf")
|
||||
config = nil
|
||||
|
||||
case Kreuzberg.extract_bytes_sync(content, "application/pdf", config) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content: #{result}")
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
21
docs/snippets/elixir/api/extract_file_async.md
Normal file
21
docs/snippets/elixir/api/extract_file_async.md
Normal file
@@ -0,0 +1,21 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def extract_file_async do
|
||||
config = nil
|
||||
|
||||
task = Task.async(fn ->
|
||||
Kreuzberg.extract_file_async("document.pdf", nil, config)
|
||||
end)
|
||||
|
||||
case Task.await(task) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content: #{result}")
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
17
docs/snippets/elixir/api/extract_file_sync.md
Normal file
17
docs/snippets/elixir/api/extract_file_sync.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule Example do
|
||||
def extract_file do
|
||||
config = nil
|
||||
|
||||
case Kreuzberg.extract_file_sync("document.pdf", nil, config) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content: #{result}")
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
Reference in New Issue
Block a user