// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import java.util.List; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * Content for a single page/slide. * * When page extraction is enabled, documents are split into per-page content * with associated tables and images mapped to each page. * * # Performance * * Uses Arc-wrapped tables and images for memory efficiency: * - {@code Vec<Arc<Table>>} enables zero-copy sharing of table data * - {@code Vec<Arc<ExtractedImage>>} enables zero-copy sharing of image data * - Maintains exact JSON compatibility via custom Serialize/Deserialize * * This reduces memory overhead for documents with shared tables/images * by avoiding redundant copies during serialization. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = PageContent.Builder.class) public record PageContent( /** * Page number (1-indexed) */ @JsonProperty("page_number") int pageNumber, /** * Text content for this page */ @JsonProperty("content") String content, /** * Tables found on this page (uses Arc for memory efficiency) * * Serializes as Table[] for JSON compatibility while maintaining * Arc semantics in-memory for zero-copy sharing. */ @Nullable @JsonProperty("tables") List tables, /** * Indices into {@code ExtractionResult.images} for images found on this page. * * Each value is a zero-based index into the top-level {@code images} collection. * Only populated when {@code extract_images = true} in the extraction config. */ @Nullable @JsonProperty("image_indices") List imageIndices, /** * Hierarchy information for the page (when hierarchy extraction is enabled) * * Contains text hierarchy levels (H1-H6) extracted from the page content. */ @Nullable @JsonProperty("hierarchy") PageHierarchy hierarchy, /** * Whether this page is blank (no meaningful text content) * * Determined during extraction based on text content analysis. * A page is blank if it has fewer than 3 non-whitespace characters * and contains no tables or images. */ @Nullable @JsonProperty("is_blank") Boolean isBlank, /** * Layout detection regions for this page (when layout detection is enabled). * * Contains detected layout regions with class, confidence, bounding box, * and area fraction. Only populated when layout detection is configured. */ @Nullable @JsonProperty("layout_regions") List layoutRegions, /** * Speaker notes for this slide (PPTX only). * * Contains the text from the slide's notes pane ({@code ppt/notesSlides/notesSlide{N}.xml}). * Only populated when the source is a PPTX file and notes are present. */ @Nullable @JsonProperty("speaker_notes") String speakerNotes, /** * Section name this slide belongs to (PPTX only). * * PowerPoint sections group slides into logical chapters ({@code <p:sectionLst>} in * {@code ppt/presentation.xml}). Only populated when the source is a PPTX file and * the slide belongs to a named section. */ @Nullable @JsonProperty("section_name") String sectionName, /** * Sheet name for this page (XLSX/ODS only). * * Each spreadsheet sheet maps to one {@code PageContent} entry. This field carries the * sheet's display name as it appears in the workbook. {@code None} for all non-spreadsheet * formats and for sheets with an empty name. */ @Nullable @JsonProperty("sheet_name") String sheetName ) { public static Builder builder() { return new Builder(); } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { @JsonProperty("page_number") private int pageNumber = 0; private String content = ""; private List
tables = null; @JsonProperty("image_indices") private List imageIndices = null; private PageHierarchy hierarchy = null; @JsonProperty("is_blank") private Boolean isBlank = null; @JsonProperty("layout_regions") private List layoutRegions = null; @JsonProperty("speaker_notes") private String speakerNotes = null; @JsonProperty("section_name") private String sectionName = null; @JsonProperty("sheet_name") private String sheetName = null; /** Sets the pageNumber field. */ @JsonProperty("page_number") public Builder withPageNumber(final int value) { this.pageNumber = value; return this; } /** Sets the content field. */ @JsonProperty("content") public Builder withContent(final String value) { this.content = value; return this; } /** Sets the tables field. */ @JsonProperty("tables") public Builder withTables(final @Nullable List
value) { this.tables = value; return this; } /** Sets the imageIndices field. */ @JsonProperty("image_indices") public Builder withImageIndices(final @Nullable List value) { this.imageIndices = value; return this; } /** Sets the hierarchy field. */ @JsonProperty("hierarchy") public Builder withHierarchy(final @Nullable PageHierarchy value) { this.hierarchy = value; return this; } /** Sets the isBlank field. */ @JsonProperty("is_blank") public Builder withIsBlank(final @Nullable boolean value) { this.isBlank = value; return this; } /** Sets the layoutRegions field. */ @JsonProperty("layout_regions") public Builder withLayoutRegions(final @Nullable List value) { this.layoutRegions = value; return this; } /** Sets the speakerNotes field. */ @JsonProperty("speaker_notes") public Builder withSpeakerNotes(final @Nullable String value) { this.speakerNotes = value; return this; } /** Sets the sectionName field. */ @JsonProperty("section_name") public Builder withSectionName(final @Nullable String value) { this.sectionName = value; return this; } /** Sets the sheetName field. */ @JsonProperty("sheet_name") public Builder withSheetName(final @Nullable String value) { this.sheetName = value; return this; } /** Builds the PageContent instance. */ public PageContent build() { return new PageContent( pageNumber, content, tables, imageIndices, hierarchy, isBlank, layoutRegions, speakerNotes, sectionName, sheetName ); } } // CPD-ON }