49 lines
1.6 KiB
Elixir
49 lines
1.6 KiB
Elixir
|
|
# This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
# To regenerate: alef generate
|
||
|
|
# To verify freshness: alef verify --exit-code
|
||
|
|
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
defmodule Kreuzberg.PageContent do
|
||
|
|
@moduledoc """
|
||
|
|
Content for a single page/slide.
|
||
|
|
|
||
|
|
When page extraction is enabled, documents are split into per-page content
|
||
|
|
with associated tables and images mapped to each page.
|
||
|
|
|
||
|
|
# Performance
|
||
|
|
|
||
|
|
Uses Arc-wrapped tables and images for memory efficiency:
|
||
|
|
- `Vec<Arc<Table>>` enables zero-copy sharing of table data
|
||
|
|
- `Vec<Arc<ExtractedImage>>` enables zero-copy sharing of image data
|
||
|
|
- Maintains exact JSON compatibility via custom Serialize/Deserialize
|
||
|
|
|
||
|
|
This reduces memory overhead for documents with shared tables/images
|
||
|
|
by avoiding redundant copies during serialization.
|
||
|
|
"""
|
||
|
|
|
||
|
|
@typedoc "Content for a single page/slide."
|
||
|
|
@type t :: %__MODULE__{
|
||
|
|
page_number: non_neg_integer(),
|
||
|
|
content: String.t() | nil,
|
||
|
|
tables: [map()],
|
||
|
|
image_indices: [non_neg_integer()],
|
||
|
|
hierarchy: map() | nil,
|
||
|
|
is_blank: boolean() | nil,
|
||
|
|
layout_regions: [map()] | nil,
|
||
|
|
speaker_notes: String.t() | nil,
|
||
|
|
section_name: String.t() | nil,
|
||
|
|
sheet_name: String.t() | nil
|
||
|
|
}
|
||
|
|
|
||
|
|
defstruct page_number: 0,
|
||
|
|
content: nil,
|
||
|
|
tables: [],
|
||
|
|
image_indices: [],
|
||
|
|
hierarchy: nil,
|
||
|
|
is_blank: nil,
|
||
|
|
layout_regions: nil,
|
||
|
|
speaker_notes: nil,
|
||
|
|
section_name: nil,
|
||
|
|
sheet_name: nil
|
||
|
|
end
|