Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
```elixir title="Elixir"
# Basic document extraction workflow
# Load file -> extract -> access results
{:ok, result} = Kreuzberg.extract_file("document.pdf")
IO.puts("Extracted Content:")
IO.puts(result.content)
IO.puts("\nMetadata:")
IO.puts("Format: #{inspect(result.metadata.format)}")
IO.puts("Tables found: #{length(result.tables)}")
```

View File

@@ -0,0 +1,19 @@
```elixir title="Elixir"
defmodule BasicUsage do
def extract_with_default_config do
# Use default configuration (nil)
config = nil
case Kreuzberg.extract_file_sync("document.pdf", nil, config) do
{:ok, content} ->
IO.puts("Extracted content:")
IO.puts(content)
:ok
{:error, reason} ->
IO.puts("Extraction failed: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,13 @@
```elixir title="Elixir"
# Extract from different file types (PDF, DOCX, etc.)
case Kreuzberg.extract_file("document.pdf") do
{:ok, result} ->
IO.puts("Content: #{result.content}")
IO.puts("Format: #{inspect(result.metadata.format)}")
IO.puts("Tables: #{length(result.tables)}")
{:error, reason} ->
IO.puts("Extraction failed: #{inspect(reason)}")
end
```

View File

@@ -0,0 +1,19 @@
```elixir title="Elixir"
defmodule ExtractFile do
def show_extraction_details do
# Extract from a file
case Kreuzberg.extract_file_sync("document.pdf", nil, nil) do
{:ok, result} ->
# Result is a string containing extracted content
IO.puts("Content length: #{String.length(result)} characters")
IO.puts("---")
IO.puts(result)
:ok
{:error, reason} ->
IO.puts("Failed to extract: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,19 @@
```elixir title="Elixir"
# Extract scanned documents with OCR
# Configure Tesseract for OCR processing
ocr_config = %Kreuzberg.Config.OCR{
backend: "tesseract",
language: "eng"
}
config = %Kreuzberg.Config.Extraction{
ocr: ocr_config
}
{:ok, result} = Kreuzberg.extract_file("scanned.pdf", config: config)
IO.puts("Extracted text from scanned document:")
IO.puts(result.content)
IO.puts("Used OCR backend: tesseract")
```

View File

@@ -0,0 +1,19 @@
```elixir title="Elixir"
defmodule ExtractWithOcr do
def extract_scanned_document do
# Build configuration with OCR settings as JSON string
config = ~s({"ocr": {"backend": "tesseract", "language": "eng"}})
case Kreuzberg.extract_file_sync("scanned.pdf", nil, config) do
{:ok, result} ->
IO.puts("Extracted via OCR:")
IO.puts(result)
:ok
{:error, reason} ->
IO.puts("OCR extraction failed: #{reason}")
:error
end
end
end
```

View File

@@ -0,0 +1,5 @@
```elixir title="Elixir"
# First Kreuzberg program - extract text from a PDF
{:ok, result} = Kreuzberg.extract_file("document.pdf")
IO.puts(result.content)
```

View File

@@ -0,0 +1,14 @@
```elixir title="Elixir"
defmodule HelloWorld do
def main do
case Kreuzberg.extract_file_sync("document.pdf", nil, nil) do
{:ok, result} ->
IO.puts("Extraction succeeded!")
IO.puts(result)
{:error, reason} ->
IO.puts("Error: #{reason}")
end
end
end
```

View File

@@ -0,0 +1,5 @@
```elixir title="Elixir"
# Verify Kreuzberg is installed and working by extracting a document
{:ok, result} = Kreuzberg.extract_file("sample.pdf")
IO.puts("Installation verified! Extracted #{String.length(result.content)} characters")
```

View File

@@ -0,0 +1,18 @@
```elixir title="Elixir"
defmodule InstallVerify do
def verify_install do
# Verify Kreuzberg module is available
{:ok, extractors} = Kreuzberg.list_document_extractors()
IO.puts("Available extractors: #{inspect(extractors)}")
# Verify a simple extraction works
case Kreuzberg.extract_file_sync("test.txt", nil, nil) do
{:ok, _result} ->
IO.puts("Kreuzberg is properly installed and working!")
{:error, reason} ->
IO.puts("Extraction failed: #{reason}")
end
end
end
```

View File

@@ -0,0 +1,22 @@
```elixir title="Elixir"
# Access different parts of result
# Content, metadata, tables, images
{:ok, result} = Kreuzberg.extract_file("document.pdf")
# Access main content
content = result.content
IO.puts("Content length: #{String.length(content)} characters")
# Access tables
tables = result.tables
IO.puts("Tables found: #{length(tables)}")
# Access images
images = result.images
IO.puts("Images found: #{length(images)}")
# Access metadata
format = result.metadata.format
IO.puts("Format: #{inspect(format)}")
```

View File

@@ -0,0 +1,24 @@
```elixir title="Elixir"
defmodule ReadContent do
def process_extracted_content do
# Extract content and iterate over lines
case Kreuzberg.extract_file_sync("document.pdf", nil, nil) do
{:ok, content} ->
IO.puts("Processing extracted content:")
# Split content into lines and iterate
content
|> String.split("\n", trim: true)
|> Enum.each(fn line ->
IO.puts(" #{line}")
end)
:ok
{:error, reason} ->
IO.puts("Error: #{reason}")
:error
end
end
end
```