Files
fil/docs/snippets/elixir/advanced/chunk_page_mapping.exs

14 lines
442 B
Elixir
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
# Extract with chunking and track page boundaries
config = %Kreuzberg.ExtractionConfig{
chunking: %{"enabled" => true, "max_characters" => 500},
track_page_boundaries: true
}
{:ok, result} = Kreuzberg.extract_file("document.pdf", nil, config)
# Map each chunk to its source page
Enum.with_index(result.chunks || [], 1) |> Enum.each(fn {chunk, idx} ->
page = chunk["page"] || "unknown"
IO.puts("Chunk #{idx} from page #{page}")
end)