Files
fil/docs/snippets/elixir/advanced/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.1 KiB

config_json = Jason.encode!(%{
  "chunking" => %{
    "enabled" => true,
    "max_characters" => 512,
    "overlap" => 50
  },
  "embeddings" => %{
    "enabled" => true,
    "model" => "all-MiniLM-L6-v2"
  }
})

{:ok, result} = Kreuzberg.extract_file_sync("document.pdf", "application/pdf", config_json)

# Prepare data for vector database storage
vector_db_records = result.chunks
  |> Enum.with_index(1)
  |> Enum.map(fn {chunk, idx} ->
    %{
      "id" => "#{result.document_id}-chunk-#{idx}",
      "vector" => chunk["embedding"],
      "metadata" => %{
        "content" => chunk["content"],
        "page" => chunk["page"],
        "document_id" => result.document_id,
        "chunk_index" => idx
      }
    }
  end)

IO.puts("Generated #{length(vector_db_records)} records for vector database")
IO.inspect(List.first(vector_db_records), label: "Sample Record")

# Example: Insert into Pinecone-like vector database
Enum.each(vector_db_records, fn record ->
  # vector_db_client.upsert(record)
  IO.puts("Would insert: #{record["id"]}")
end)