This commit is contained in:
312
docs/snippets/elixir/mcp/mcp_custom_client.exs
Normal file
312
docs/snippets/elixir/mcp/mcp_custom_client.exs
Normal file
@@ -0,0 +1,312 @@
|
||||
```elixir title="Elixir"
|
||||
# MCP Custom Client - Connect to Kreuzberg MCP servers
|
||||
# Demonstrates creating a reusable MCP client for document extraction
|
||||
|
||||
defmodule KreuzbergMCPClient do
|
||||
@moduledoc """
|
||||
MCP client for communicating with Kreuzberg extraction servers.
|
||||
|
||||
Provides methods for extracting documents from remote MCP servers
|
||||
with support for caching, retry logic, and error handling.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
defmodule Config do
|
||||
@moduledoc """
|
||||
Configuration for MCP client connections.
|
||||
"""
|
||||
|
||||
defstruct [
|
||||
:host,
|
||||
:port,
|
||||
:timeout_ms,
|
||||
:max_retries,
|
||||
:retry_delay_ms,
|
||||
:cache_dir
|
||||
]
|
||||
|
||||
def new(opts \\ []) do
|
||||
%Config{
|
||||
host: Keyword.get(opts, :host, "localhost"),
|
||||
port: Keyword.get(opts, :port, 8080),
|
||||
timeout_ms: Keyword.get(opts, :timeout_ms, 30000),
|
||||
max_retries: Keyword.get(opts, :max_retries, 3),
|
||||
retry_delay_ms: Keyword.get(opts, :retry_delay_ms, 1000),
|
||||
cache_dir: Keyword.get(opts, :cache_dir, nil)
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Extract document from file via MCP server.
|
||||
|
||||
Sends extraction request to remote Kreuzberg MCP server and returns
|
||||
structured extraction result with optional caching.
|
||||
|
||||
## Options
|
||||
|
||||
* `:mime_type` - MIME type of document
|
||||
* `:config` - Extraction configuration map
|
||||
* `:use_cache` - Enable result caching (default: false)
|
||||
"""
|
||||
@spec extract_file(Config.t(), String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def extract_file(config, file_path, opts \\ []) do
|
||||
mime_type = Keyword.get(opts, :mime_type)
|
||||
extraction_config = Keyword.get(opts, :config)
|
||||
use_cache = Keyword.get(opts, :use_cache, false)
|
||||
|
||||
# Check cache first
|
||||
if use_cache and config.cache_dir do
|
||||
cache_key = compute_cache_key(file_path, mime_type, extraction_config)
|
||||
|
||||
case get_from_cache(config.cache_dir, cache_key) do
|
||||
{:ok, cached_result} ->
|
||||
Logger.debug("Cache hit for #{file_path}")
|
||||
{:ok, cached_result}
|
||||
|
||||
:miss ->
|
||||
# Cache miss, fetch from server
|
||||
case fetch_from_server(config, file_path, mime_type, extraction_config) do
|
||||
{:ok, result} ->
|
||||
if use_cache, do: store_in_cache(config.cache_dir, cache_key, result)
|
||||
{:ok, result}
|
||||
|
||||
error ->
|
||||
error
|
||||
end
|
||||
end
|
||||
else
|
||||
fetch_from_server(config, file_path, mime_type, extraction_config)
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Upload and extract document via MCP server.
|
||||
|
||||
Reads file from disk, uploads it to the server, and returns extraction result.
|
||||
Useful for server-side processing of large files.
|
||||
"""
|
||||
@spec upload_and_extract(Config.t(), String.t(), keyword()) ::
|
||||
{:ok, map()} | {:error, String.t()}
|
||||
def upload_and_extract(config, file_path, opts \\ []) do
|
||||
unless File.exists?(file_path) do
|
||||
{:error, "File not found: #{file_path}"}
|
||||
else
|
||||
case File.read(file_path) do
|
||||
{:ok, body} ->
|
||||
url = "http://#{config.host}:#{config.port}/extract/file"
|
||||
|
||||
headers = [
|
||||
{"Content-Type", "application/octet-stream"},
|
||||
{"X-File-Name", Path.basename(file_path)}
|
||||
]
|
||||
|
||||
case HTTPoison.post(url, body, headers, timeout: config.timeout_ms) do
|
||||
{:ok, response} ->
|
||||
handle_response(response)
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("Upload failed: #{inspect(reason)}")
|
||||
{:error, "Upload failed: #{inspect(reason)}"}
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, "Failed to read file: #{inspect(reason)}"}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Check health status of MCP server.
|
||||
"""
|
||||
@spec health_check(Config.t()) :: {:ok, map()} | {:error, String.t()}
|
||||
def health_check(config) do
|
||||
url = "http://#{config.host}:#{config.port}/health"
|
||||
|
||||
case HTTPoison.get(url, [], timeout: config.timeout_ms) do
|
||||
{:ok, response} ->
|
||||
case handle_response(response) do
|
||||
{:ok, data} -> {:ok, data}
|
||||
error -> error
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, "Health check failed: #{inspect(reason)}"}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Batch extract multiple documents with parallel requests.
|
||||
|
||||
Sends concurrent extraction requests for better throughput with large
|
||||
document collections.
|
||||
"""
|
||||
@spec batch_extract(Config.t(), [String.t()], keyword()) ::
|
||||
{:ok, [map()]} | {:error, String.t()}
|
||||
def batch_extract(config, file_paths, opts \\ []) do
|
||||
Logger.info("Batch extracting #{length(file_paths)} documents")
|
||||
|
||||
results =
|
||||
file_paths
|
||||
|> Task.async_stream(fn path ->
|
||||
extract_file(config, path, opts)
|
||||
end)
|
||||
|> Stream.map(fn {:ok, result} -> result end)
|
||||
|> Enum.to_list()
|
||||
|
||||
success_count = Enum.count(results, &match?({:ok, _}, &1))
|
||||
Logger.info("Batch extraction complete: #{success_count}/#{length(file_paths)} succeeded")
|
||||
|
||||
{:ok, results}
|
||||
end
|
||||
|
||||
# Private helpers
|
||||
|
||||
defp fetch_from_server(config, file_path, mime_type, extraction_config) do
|
||||
url = "http://#{config.host}:#{config.port}/extract"
|
||||
|
||||
body =
|
||||
Jason.encode!(%{
|
||||
file_path: file_path,
|
||||
mime_type: mime_type,
|
||||
config: extraction_config
|
||||
})
|
||||
|
||||
headers = [{"Content-Type", "application/json"}]
|
||||
|
||||
retry_request(config, fn ->
|
||||
HTTPoison.post(url, body, headers, timeout: config.timeout_ms)
|
||||
end)
|
||||
|> case do
|
||||
{:ok, response} -> handle_response(response)
|
||||
error -> error
|
||||
end
|
||||
end
|
||||
|
||||
defp retry_request(config, request_fn) do
|
||||
retry_request(config, request_fn, 0)
|
||||
end
|
||||
|
||||
defp retry_request(config, request_fn, attempt) when attempt < config.max_retries do
|
||||
case request_fn.() do
|
||||
{:ok, response} ->
|
||||
{:ok, response}
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.warn("Request failed (attempt #{attempt + 1}): #{inspect(reason)}")
|
||||
Process.sleep(config.retry_delay_ms)
|
||||
retry_request(config, request_fn, attempt + 1)
|
||||
end
|
||||
end
|
||||
|
||||
defp retry_request(_config, _request_fn, _attempt) do
|
||||
{:error, "Max retries exceeded"}
|
||||
end
|
||||
|
||||
defp handle_response(%HTTPoison.Response{status_code: 200, body: body}) do
|
||||
case Jason.decode(body) do
|
||||
{:ok, data} ->
|
||||
if Map.get(data, "success") do
|
||||
{:ok, data}
|
||||
else
|
||||
{:error, Map.get(data, "error", "Unknown error")}
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, "Failed to decode response: #{inspect(reason)}"}
|
||||
end
|
||||
end
|
||||
|
||||
defp handle_response(%HTTPoison.Response{status_code: status, body: body}) do
|
||||
{:error, "Server error (#{status}): #{body}"}
|
||||
end
|
||||
|
||||
defp compute_cache_key(file_path, mime_type, config) do
|
||||
content = "#{file_path}|#{mime_type}|#{inspect(config)}"
|
||||
:crypto.hash(:sha256, content) |> Base.encode16(case: :lower)
|
||||
end
|
||||
|
||||
defp get_from_cache(cache_dir, cache_key) do
|
||||
cache_file = Path.join(cache_dir, "#{cache_key}.json")
|
||||
|
||||
if File.exists?(cache_file) do
|
||||
case File.read(cache_file) do
|
||||
{:ok, content} ->
|
||||
{:ok, Jason.decode!(content)}
|
||||
|
||||
:error ->
|
||||
:miss
|
||||
end
|
||||
else
|
||||
:miss
|
||||
end
|
||||
end
|
||||
|
||||
defp store_in_cache(cache_dir, cache_key, result) do
|
||||
File.mkdir_p!(cache_dir)
|
||||
cache_file = Path.join(cache_dir, "#{cache_key}.json")
|
||||
File.write!(cache_file, Jason.encode!(result))
|
||||
end
|
||||
end
|
||||
|
||||
# Usage examples
|
||||
IO.puts("=== Kreuzberg MCP Client ===\n")
|
||||
|
||||
# Create client configuration
|
||||
config = KreuzbergMCPClient.Config.new(
|
||||
host: "localhost",
|
||||
port: 8080,
|
||||
timeout_ms: 30000,
|
||||
max_retries: 3,
|
||||
cache_dir: "/tmp/kreuzberg_cache"
|
||||
)
|
||||
|
||||
# Check server health
|
||||
IO.puts("Checking server health...")
|
||||
|
||||
case KreuzbergMCPClient.health_check(config) do
|
||||
{:ok, health} ->
|
||||
IO.puts("Server status: #{health["status"]}")
|
||||
IO.puts("Service: #{health["service"]}\n")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Health check failed: #{reason}\n")
|
||||
end
|
||||
|
||||
# Extract single document
|
||||
IO.puts("Extracting document...")
|
||||
|
||||
case KreuzbergMCPClient.extract_file(config, "document.pdf", use_cache: true) do
|
||||
{:ok, result} ->
|
||||
IO.puts("Success!")
|
||||
IO.puts("Content size: #{byte_size(result["content"])} bytes")
|
||||
IO.puts("MIME type: #{result["mime_type"]}")
|
||||
IO.puts("Tables found: #{length(result["tables"])}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Extraction failed: #{reason}")
|
||||
end
|
||||
|
||||
IO.puts("")
|
||||
|
||||
# Batch extract multiple documents
|
||||
IO.puts("Batch extracting multiple documents...")
|
||||
|
||||
documents = [
|
||||
"doc1.pdf",
|
||||
"doc2.pdf",
|
||||
"doc3.pdf"
|
||||
]
|
||||
|
||||
case KreuzbergMCPClient.batch_extract(config, documents) do
|
||||
{:ok, results} ->
|
||||
IO.puts("Batch extraction complete!")
|
||||
successful = Enum.count(results, &match?({:ok, _}, &1))
|
||||
IO.puts("Successful: #{successful}/#{length(results)}")
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Batch extraction failed: #{reason}")
|
||||
end
|
||||
```
|
||||
300
docs/snippets/elixir/mcp/mcp_server_start.exs
Normal file
300
docs/snippets/elixir/mcp/mcp_server_start.exs
Normal file
@@ -0,0 +1,300 @@
|
||||
```elixir title="Elixir"
|
||||
# MCP Server Integration - Start a Kreuzberg MCP server
|
||||
# Demonstrates how to set up and manage an MCP server for remote document extraction
|
||||
|
||||
defmodule KreuzbergMCPServer do
|
||||
@moduledoc """
|
||||
MCP (Model Context Protocol) server for Kreuzberg document extraction.
|
||||
|
||||
Provides a standardized interface for remote clients to extract documents
|
||||
using the Kreuzberg library via the Model Context Protocol.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
alias Kreuzberg.ExtractionConfig
|
||||
|
||||
@doc """
|
||||
Start the MCP server on the specified host and port.
|
||||
|
||||
The server accepts extraction requests from MCP clients and returns
|
||||
structured document data including content, metadata, and extracted elements.
|
||||
|
||||
## Options
|
||||
|
||||
* `:host` - Server host (default: "127.0.0.1")
|
||||
* `:port` - Server port (default: 8080)
|
||||
* `:max_connections` - Maximum concurrent connections (default: 10)
|
||||
"""
|
||||
def start_server(opts \\ []) do
|
||||
host = Keyword.get(opts, :host, "127.0.0.1")
|
||||
port = Keyword.get(opts, :port, 8080)
|
||||
max_connections = Keyword.get(opts, :max_connections, 10)
|
||||
|
||||
Logger.info("Starting Kreuzberg MCP server on #{host}:#{port}")
|
||||
|
||||
{:ok, _pid} =
|
||||
:cowboy.start_clear(
|
||||
:kreuzberg_http,
|
||||
[{:port, port}],
|
||||
%{
|
||||
env: [
|
||||
{:dispatch,
|
||||
[
|
||||
{:_,
|
||||
[
|
||||
{"/extract", KreuzbergMCPServer.Handler, []},
|
||||
{"/extract/file", KreuzbergMCPServer.FileHandler, []},
|
||||
{"/health", KreuzbergMCPServer.HealthHandler, []}
|
||||
]}
|
||||
]}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
Logger.info("MCP server started successfully")
|
||||
{:ok, "Server running on #{host}:#{port}"}
|
||||
end
|
||||
|
||||
@doc """
|
||||
Stop the MCP server gracefully.
|
||||
"""
|
||||
def stop_server do
|
||||
Logger.info("Stopping Kreuzberg MCP server")
|
||||
:cowboy.stop_listener(:kreuzberg_http)
|
||||
Logger.info("MCP server stopped")
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
# Handler for extraction requests
|
||||
defmodule KreuzbergMCPServer.Handler do
|
||||
@moduledoc """
|
||||
HTTP handler for MCP extraction requests.
|
||||
Processes incoming extraction requests with optional configuration.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
def init(req, state) do
|
||||
req
|
||||
|> handle_request()
|
||||
|> reply()
|
||||
|> wrap_response(state)
|
||||
end
|
||||
|
||||
defp handle_request(req) do
|
||||
case req.method do
|
||||
"POST" -> handle_extraction(req)
|
||||
_ -> error_response(405, "Method not allowed")
|
||||
end
|
||||
end
|
||||
|
||||
defp handle_extraction(req) do
|
||||
case :cowboy_req.read_body(req) do
|
||||
{:ok, body, req} ->
|
||||
case Jason.decode(body) do
|
||||
{:ok, params} ->
|
||||
extract_from_params(params, req)
|
||||
|
||||
{:error, reason} ->
|
||||
error_response(400, "Invalid JSON: #{inspect(reason)}")
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
error_response(400, "Failed to read body: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
defp extract_from_params(params, req) do
|
||||
file_path = Map.get(params, "file_path")
|
||||
mime_type = Map.get(params, "mime_type")
|
||||
config_opts = Map.get(params, "config", %{})
|
||||
|
||||
unless file_path do
|
||||
error_response(400, "Missing required parameter: file_path")
|
||||
else
|
||||
config = build_config(config_opts)
|
||||
|
||||
case Kreuzberg.extract_file(file_path, mime_type, config) do
|
||||
{:ok, result} ->
|
||||
response_data = %{
|
||||
success: true,
|
||||
content: result.content,
|
||||
mime_type: result.mime_type,
|
||||
metadata: result.metadata || %{},
|
||||
tables: result.tables || [],
|
||||
chunks: result.chunks || [],
|
||||
images: result.images || [],
|
||||
detected_languages: result.detected_languages || []
|
||||
}
|
||||
|
||||
success_response(200, response_data, req)
|
||||
|
||||
{:error, reason} ->
|
||||
error_response(400, "Extraction failed: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp build_config(opts) when is_map(opts) do
|
||||
%Kreuzberg.ExtractionConfig{
|
||||
ocr: opts["ocr"],
|
||||
chunking: opts["chunking"],
|
||||
quality_processing: opts["quality_processing"],
|
||||
language_detection: opts["language_detection"],
|
||||
images: opts["images"],
|
||||
use_cache: Map.get(opts, "use_cache", true)
|
||||
}
|
||||
end
|
||||
|
||||
defp build_config(_), do: nil
|
||||
|
||||
defp success_response(status, data, req) do
|
||||
{:ok,
|
||||
:cowboy_req.reply(
|
||||
status,
|
||||
%{"content-type" => "application/json"},
|
||||
Jason.encode!(data),
|
||||
req
|
||||
)}
|
||||
end
|
||||
|
||||
defp error_response(status, message) do
|
||||
{:error,
|
||||
status,
|
||||
Jason.encode!(%{
|
||||
success: false,
|
||||
error: message
|
||||
})}
|
||||
end
|
||||
|
||||
defp reply({:ok, req}), do: {req, :ok}
|
||||
defp reply({:error, status, body}), do: {status, body}
|
||||
|
||||
defp wrap_response({req, :ok}, state), do: {:ok, req, state}
|
||||
defp wrap_response({status, body}, state) do
|
||||
# Note: In actual implementation, req needs to be passed through the pipeline
|
||||
# For now, create a minimal request object for error responses
|
||||
req = :cowboy_req.new()
|
||||
{:cowboy_req.reply(status, %{}, body, req), state}
|
||||
end
|
||||
end
|
||||
|
||||
# Health check handler
|
||||
defmodule KreuzbergMCPServer.HealthHandler do
|
||||
@moduledoc """
|
||||
Health check endpoint for the MCP server.
|
||||
"""
|
||||
|
||||
def init(req, state) do
|
||||
response = Jason.encode!(%{
|
||||
status: "healthy",
|
||||
service: "kreuzberg-mcp",
|
||||
timestamp: DateTime.utc_now() |> DateTime.to_iso8601()
|
||||
})
|
||||
|
||||
req =
|
||||
:cowboy_req.reply(
|
||||
200,
|
||||
%{"content-type" => "application/json"},
|
||||
response,
|
||||
req
|
||||
)
|
||||
|
||||
{:ok, req, state}
|
||||
end
|
||||
end
|
||||
|
||||
# File upload handler
|
||||
defmodule KreuzbergMCPServer.FileHandler do
|
||||
@moduledoc """
|
||||
Handler for multipart file uploads for extraction.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
def init(req, state) do
|
||||
case req.method do
|
||||
"POST" -> handle_file_upload(req, state)
|
||||
_ -> {:cowboy_req.reply(405, %{}, "Method not allowed", req), state}
|
||||
end
|
||||
end
|
||||
|
||||
defp handle_file_upload(req, state) do
|
||||
# Store uploaded file temporarily
|
||||
temp_path = "/tmp/kreuzberg_#{System.unique_integer([:positive])}"
|
||||
|
||||
case :cowboy_req.read_body(req) do
|
||||
{:ok, body, req} ->
|
||||
File.write!(temp_path, body)
|
||||
|
||||
case Kreuzberg.extract_file(temp_path) do
|
||||
{:ok, result} ->
|
||||
response = Jason.encode!(%{
|
||||
success: true,
|
||||
content_size: byte_size(result.content),
|
||||
mime_type: result.mime_type,
|
||||
metadata: result.metadata
|
||||
})
|
||||
|
||||
req =
|
||||
:cowboy_req.reply(
|
||||
200,
|
||||
%{"content-type" => "application/json"},
|
||||
response,
|
||||
req
|
||||
)
|
||||
|
||||
File.rm(temp_path)
|
||||
{:ok, req, state}
|
||||
|
||||
{:error, reason} ->
|
||||
response = Jason.encode!(%{success: false, error: inspect(reason)})
|
||||
|
||||
req =
|
||||
:cowboy_req.reply(
|
||||
400,
|
||||
%{"content-type" => "application/json"},
|
||||
response,
|
||||
req
|
||||
)
|
||||
|
||||
File.rm(temp_path)
|
||||
{:ok, req, state}
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
response = Jason.encode!(%{success: false, error: inspect(reason)})
|
||||
|
||||
req =
|
||||
:cowboy_req.reply(
|
||||
400,
|
||||
%{"content-type" => "application/json"},
|
||||
response,
|
||||
req
|
||||
)
|
||||
|
||||
{:ok, req, state}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Usage example - start the server
|
||||
IO.puts("=== Kreuzberg MCP Server ===\n")
|
||||
|
||||
case KreuzbergMCPServer.start_server(port: 8080) do
|
||||
{:ok, message} ->
|
||||
IO.puts(message)
|
||||
IO.puts("\nServer is running and ready to accept requests:")
|
||||
IO.puts(" - POST /extract - Extract from file path")
|
||||
IO.puts(" - POST /extract/file - Upload and extract")
|
||||
IO.puts(" - GET /health - Health check")
|
||||
|
||||
# Keep the server running
|
||||
IO.puts("\nServer started. Press Ctrl+C to stop.")
|
||||
Process.sleep(:infinity)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts("Failed to start server: #{inspect(reason)}")
|
||||
end
|
||||
```
|
||||
Reference in New Issue
Block a user