This commit is contained in:
13
docs/snippets/elixir/getting-started/basic_usage.exs
Normal file
13
docs/snippets/elixir/getting-started/basic_usage.exs
Normal file
@@ -0,0 +1,13 @@
|
||||
```elixir title="Elixir"
|
||||
# Basic document extraction workflow
|
||||
# Load file -> extract -> access results
|
||||
|
||||
{:ok, result} = Kreuzberg.extract_file("document.pdf")
|
||||
|
||||
IO.puts("Extracted Content:")
|
||||
IO.puts(result.content)
|
||||
|
||||
IO.puts("\nMetadata:")
|
||||
IO.puts("Format: #{inspect(result.metadata.format)}")
|
||||
IO.puts("Tables found: #{length(result.tables)}")
|
||||
```
|
||||
19
docs/snippets/elixir/getting-started/basic_usage.md
Normal file
19
docs/snippets/elixir/getting-started/basic_usage.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule BasicUsage do
|
||||
def extract_with_default_config do
|
||||
# Use default configuration (nil)
|
||||
config = nil
|
||||
|
||||
case Kreuzberg.extract_file_sync("document.pdf", nil, config) do
|
||||
{:ok, content} ->
|
||||
IO.puts("Extracted content:")
|
||||
IO.puts(content)
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
13
docs/snippets/elixir/getting-started/extract_file.exs
Normal file
13
docs/snippets/elixir/getting-started/extract_file.exs
Normal file
@@ -0,0 +1,13 @@
|
||||
```elixir title="Elixir"
|
||||
# Extract from different file types (PDF, DOCX, etc.)
|
||||
|
||||
case Kreuzberg.extract_file("document.pdf") do
|
||||
{:ok, result} ->
|
||||
IO.puts("Content: #{result.content}")
|
||||
IO.puts("Format: #{inspect(result.metadata.format)}")
|
||||
IO.puts("Tables: #{length(result.tables)}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{inspect(reason)}")
|
||||
end
|
||||
```
|
||||
19
docs/snippets/elixir/getting-started/extract_file.md
Normal file
19
docs/snippets/elixir/getting-started/extract_file.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule ExtractFile do
|
||||
def show_extraction_details do
|
||||
# Extract from a file
|
||||
case Kreuzberg.extract_file_sync("document.pdf", nil, nil) do
|
||||
{:ok, result} ->
|
||||
# Result is a string containing extracted content
|
||||
IO.puts("Content length: #{String.length(result)} characters")
|
||||
IO.puts("---")
|
||||
IO.puts(result)
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Failed to extract: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
19
docs/snippets/elixir/getting-started/extract_with_ocr.exs
Normal file
19
docs/snippets/elixir/getting-started/extract_with_ocr.exs
Normal file
@@ -0,0 +1,19 @@
|
||||
```elixir title="Elixir"
|
||||
# Extract scanned documents with OCR
|
||||
# Configure Tesseract for OCR processing
|
||||
|
||||
ocr_config = %Kreuzberg.Config.OCR{
|
||||
backend: "tesseract",
|
||||
language: "eng"
|
||||
}
|
||||
|
||||
config = %Kreuzberg.Config.Extraction{
|
||||
ocr: ocr_config
|
||||
}
|
||||
|
||||
{:ok, result} = Kreuzberg.extract_file("scanned.pdf", config: config)
|
||||
|
||||
IO.puts("Extracted text from scanned document:")
|
||||
IO.puts(result.content)
|
||||
IO.puts("Used OCR backend: tesseract")
|
||||
```
|
||||
19
docs/snippets/elixir/getting-started/extract_with_ocr.md
Normal file
19
docs/snippets/elixir/getting-started/extract_with_ocr.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule ExtractWithOcr do
|
||||
def extract_scanned_document do
|
||||
# Build configuration with OCR settings as JSON string
|
||||
config = ~s({"ocr": {"backend": "tesseract", "language": "eng"}})
|
||||
|
||||
case Kreuzberg.extract_file_sync("scanned.pdf", nil, config) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Extracted via OCR:")
|
||||
IO.puts(result)
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("OCR extraction failed: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
5
docs/snippets/elixir/getting-started/hello_world.exs
Normal file
5
docs/snippets/elixir/getting-started/hello_world.exs
Normal file
@@ -0,0 +1,5 @@
|
||||
```elixir title="Elixir"
|
||||
# First Kreuzberg program - extract text from a PDF
|
||||
{:ok, result} = Kreuzberg.extract_file("document.pdf")
|
||||
IO.puts(result.content)
|
||||
```
|
||||
14
docs/snippets/elixir/getting-started/hello_world.md
Normal file
14
docs/snippets/elixir/getting-started/hello_world.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule HelloWorld do
|
||||
def main do
|
||||
case Kreuzberg.extract_file_sync("document.pdf", nil, nil) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Extraction succeeded!")
|
||||
IO.puts(result)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
5
docs/snippets/elixir/getting-started/install_verify.exs
Normal file
5
docs/snippets/elixir/getting-started/install_verify.exs
Normal file
@@ -0,0 +1,5 @@
|
||||
```elixir title="Elixir"
|
||||
# Verify Kreuzberg is installed and working by extracting a document
|
||||
{:ok, result} = Kreuzberg.extract_file("sample.pdf")
|
||||
IO.puts("Installation verified! Extracted #{String.length(result.content)} characters")
|
||||
```
|
||||
18
docs/snippets/elixir/getting-started/install_verify.md
Normal file
18
docs/snippets/elixir/getting-started/install_verify.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule InstallVerify do
|
||||
def verify_install do
|
||||
# Verify Kreuzberg module is available
|
||||
{:ok, extractors} = Kreuzberg.list_document_extractors()
|
||||
IO.puts("Available extractors: #{inspect(extractors)}")
|
||||
|
||||
# Verify a simple extraction works
|
||||
case Kreuzberg.extract_file_sync("test.txt", nil, nil) do
|
||||
{:ok, _result} ->
|
||||
IO.puts("Kreuzberg is properly installed and working!")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
22
docs/snippets/elixir/getting-started/read_content.exs
Normal file
22
docs/snippets/elixir/getting-started/read_content.exs
Normal file
@@ -0,0 +1,22 @@
|
||||
```elixir title="Elixir"
|
||||
# Access different parts of result
|
||||
# Content, metadata, tables, images
|
||||
|
||||
{:ok, result} = Kreuzberg.extract_file("document.pdf")
|
||||
|
||||
# Access main content
|
||||
content = result.content
|
||||
IO.puts("Content length: #{String.length(content)} characters")
|
||||
|
||||
# Access tables
|
||||
tables = result.tables
|
||||
IO.puts("Tables found: #{length(tables)}")
|
||||
|
||||
# Access images
|
||||
images = result.images
|
||||
IO.puts("Images found: #{length(images)}")
|
||||
|
||||
# Access metadata
|
||||
format = result.metadata.format
|
||||
IO.puts("Format: #{inspect(format)}")
|
||||
```
|
||||
24
docs/snippets/elixir/getting-started/read_content.md
Normal file
24
docs/snippets/elixir/getting-started/read_content.md
Normal file
@@ -0,0 +1,24 @@
|
||||
```elixir title="Elixir"
|
||||
defmodule ReadContent do
|
||||
def process_extracted_content do
|
||||
# Extract content and iterate over lines
|
||||
case Kreuzberg.extract_file_sync("document.pdf", nil, nil) do
|
||||
{:ok, content} ->
|
||||
IO.puts("Processing extracted content:")
|
||||
|
||||
# Split content into lines and iterate
|
||||
content
|
||||
|> String.split("\n", trim: true)
|
||||
|> Enum.each(fn line ->
|
||||
IO.puts(" #{line}")
|
||||
end)
|
||||
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Error: #{reason}")
|
||||
:error
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
Reference in New Issue
Block a user