Files
fil/docs/snippets/elixir/metadata/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

defmodule VectorRecord do
  defstruct [:id, :content, :embedding, :metadata]
end

defmodule VectorIntegration do
  def extract_and_vectorize(document_path, document_id) do
    config =
      %{
        "chunking" => %{
          "max_characters" => 512,
          "overlap" => 50,
          "embedding" => %{
            "model" => %{"preset" => %{"name" => "balanced"}},
            "normalize" => true,
            "batch_size" => 32
          }
        }
      }
      |> Jason.encode!()

    case Kreuzberg.extract_file_sync(document_path, nil, config) do
      {:ok, result} ->
        decoded = Jason.decode!(result)

        case decoded do
          %{"chunks" => chunks} when is_list(chunks) ->
            records =
              chunks
              |> Enum.with_index()
              |> Enum.flat_map(fn {chunk, index} ->
                case chunk do
                  %{"embedding" => embedding, "content" => content}
                  when is_list(embedding) ->
                    metadata = %{
                      "document_id" => document_id,
                      "chunk_index" => Integer.to_string(index),
                      "content_length" => Integer.to_string(String.length(content))
                    }

                    [
                      %VectorRecord{
                        id: "#{document_id}_chunk_#{index}",
                        content: content,
                        embedding: embedding,
                        metadata: metadata
                      }
                    ]

                  _ ->
                    []
                end
              end)

            {:ok, records}

          _ ->
            {:error, "No chunks in extraction result"}
        end

      {:error, reason} ->
        {:error, reason}
    end
  end
end