Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
```elixir title="Elixir"
defmodule Example do
def batch_extract_bytes do
# Note: Batch extraction in Elixir is done via Task.async_stream over sync calls
files = ["doc1.pdf", "doc2.docx", "report.pdf"]
config = nil
results =
files
|> Task.async_stream(
fn file ->
content = File.read!(file)
Kreuzberg.extract_bytes_sync(content, "application/pdf", config)
end,
max_concurrency: 4
)
|> Enum.map(fn {:ok, result} -> result end)
Enum.each(results, fn
{:ok, content} -> IO.puts("Extracted: #{String.length(content)} chars")
{:error, reason} -> IO.puts("Error: #{reason}")
end)
end
end
```

View File

@@ -0,0 +1,23 @@
```elixir title="Elixir"
defmodule Example do
def batch_extract_files do
files = ["doc1.pdf", "doc2.docx", "report.pdf"]
config = nil
results =
files
|> Task.async_stream(
fn file ->
Kreuzberg.extract_file_sync(file, nil, config)
end,
max_concurrency: 4
)
|> Enum.map(fn {:ok, result} -> result end)
Enum.each(results, fn
{:ok, content} -> IO.puts("File extracted: #{String.length(content)} chars")
{:error, reason} -> IO.puts("Error: #{reason}")
end)
end
end
```

View File

@@ -0,0 +1,30 @@
```elixir title="Elixir"
defmodule Example do
def chunk_text_via_http do
text = "Your long document text here..."
config = %{
"chunking" => %{
"max_characters" => 800,
"overlap" => 100,
"chunker_type" => "Markdown"
}
}
with {:ok, response} <- Req.post(
"http://localhost:8000/chunk",
json: %{
"text" => text,
"config" => config
}
),
{:ok, body} <- Jason.decode(response.body) do
chunks = body["chunks"]
IO.puts("Created #{length(chunks)} chunks")
{:ok, chunks}
else
error -> {:error, inspect(error)}
end
end
end
```

View File

@@ -0,0 +1,19 @@
```elixir title="Elixir"
defmodule Example do
def extract_via_http do
file_path = "document.pdf"
with {:ok, file} <- File.read(file_path),
{:ok, response} <- Req.post(
"http://localhost:8000/extract",
form: [file: {:file, file_path}]
),
{:ok, body} <- Jason.decode(response.body) do
IO.puts("Extracted content: #{body["content"]}")
{:ok, body}
else
error -> {:error, inspect(error)}
end
end
end
```

View File

@@ -0,0 +1,41 @@
```elixir title="Elixir"
defmodule Example do
def full_extraction_pipeline do
# Build a comprehensive extraction config as a JSON string or map
config_json = Jason.encode!(%{
"use_cache" => true,
"enable_quality_processing" => true,
"force_ocr" => false,
"ocr" => %{
"backend" => "tesseract",
"language" => "eng"
},
"chunking" => %{
"max_characters" => 800,
"overlap" => 100,
"chunker_type" => "Markdown",
"prepend_heading_context" => true
},
"output_format" => "Markdown",
"include_document_structure" => true,
"images" => %{
"extract_images" => true
},
"language_detection" => %{
"detect" => true
}
})
case Kreuzberg.extract_file_sync("report.pdf", nil, config_json) do
{:ok, result} ->
IO.puts("Extraction successful")
IO.puts("Content length: #{String.length(result)} chars")
:ok
{:error, reason} ->
IO.puts("Extraction failed: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,24 @@
```elixir title="Elixir"
defmodule Example do
def handle_extraction_errors do
# Extract with invalid MIME type
case Kreuzberg.extract_file_sync("document.txt", nil, nil) do
{:ok, result} ->
IO.puts("Success: #{String.length(result)} chars")
{:error, reason} when is_binary(reason) ->
# Error is a string description
case reason do
msg when String.contains?(msg, "unsupported") ->
IO.puts("Unsupported format: #{msg}")
msg when String.contains?(msg, "not found") ->
IO.puts("File not found: #{msg}")
msg ->
IO.puts("Extraction failed: #{msg}")
end
end
end
end
```

View File

@@ -0,0 +1,28 @@
```elixir title="Elixir"
defmodule Example do
def robust_extract(path) do
with {:file_exists, true} <- {:file_exists, File.exists?(path)},
{:read, {:ok, content}} <- {:read, File.read(path)},
{:mime, {:ok, mime_type}} <- {:mime, detect_mime_type(content)},
{:extract, {:ok, result}} <- {:extract, Kreuzberg.extract_bytes_sync(content, mime_type, nil)} do
{:ok, result}
else
{:file_exists, false} ->
{:error, "File not found: #{path}"}
{:read, {:error, reason}} ->
{:error, "Failed to read file: #{inspect(reason)}"}
{:mime, {:error, reason}} ->
{:error, "MIME detection failed: #{reason}"}
{:extract, {:error, reason}} ->
{:error, "Extraction failed: #{reason}"}
end
end
defp detect_mime_type(content) do
Kreuzberg.detect_mime_type_from_bytes(content)
end
end
```

View File

@@ -0,0 +1,22 @@
```elixir title="Elixir"
defmodule Example do
def extract_from_bytes_async do
content = File.read!("document.pdf")
config = nil
task = Task.async(fn ->
Kreuzberg.extract_bytes_async(content, "application/pdf", config)
end)
case Task.await(task) do
{:ok, result} ->
IO.puts("Content: #{result}")
:ok
{:error, reason} ->
IO.puts("Error: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,18 @@
```elixir title="Elixir"
defmodule Example do
def extract_from_bytes do
content = File.read!("document.pdf")
config = nil
case Kreuzberg.extract_bytes_sync(content, "application/pdf", config) do
{:ok, result} ->
IO.puts("Content: #{result}")
:ok
{:error, reason} ->
IO.puts("Error: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,21 @@
```elixir title="Elixir"
defmodule Example do
def extract_file_async do
config = nil
task = Task.async(fn ->
Kreuzberg.extract_file_async("document.pdf", nil, config)
end)
case Task.await(task) do
{:ok, result} ->
IO.puts("Content: #{result}")
:ok
{:error, reason} ->
IO.puts("Error: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,17 @@
```elixir title="Elixir"
defmodule Example do
def extract_file do
config = nil
case Kreuzberg.extract_file_sync("document.pdf", nil, config) do
{:ok, result} ->
IO.puts("Content: #{result}")
:ok
{:error, reason} ->
IO.puts("Error: #{reason}")
:error
end
end
end
```