This commit is contained in:
13
docs/snippets/elixir/advanced/chunk_page_mapping.exs
Normal file
13
docs/snippets/elixir/advanced/chunk_page_mapping.exs
Normal file
@@ -0,0 +1,13 @@
|
||||
# Extract with chunking and track page boundaries
|
||||
config = %Kreuzberg.ExtractionConfig{
|
||||
chunking: %{"enabled" => true, "max_characters" => 500},
|
||||
track_page_boundaries: true
|
||||
}
|
||||
|
||||
{:ok, result} = Kreuzberg.extract_file("document.pdf", nil, config)
|
||||
|
||||
# Map each chunk to its source page
|
||||
Enum.with_index(result.chunks || [], 1) |> Enum.each(fn {chunk, idx} ->
|
||||
page = chunk["page"] || "unknown"
|
||||
IO.puts("Chunk #{idx} from page #{page}")
|
||||
end)
|
||||
Reference in New Issue
Block a user