163 lines
5.3 KiB
Java
Generated
163 lines
5.3 KiB
Java
Generated
// This file is auto-generated by alef — DO NOT EDIT.
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
// To regenerate: alef generate
|
|
// To verify freshness: alef verify --exit-code
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
package dev.kreuzberg;
|
|
|
|
import java.util.List;
|
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
|
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
|
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
|
import org.jspecify.annotations.Nullable;
|
|
|
|
/**
|
|
* Metadata for individual page/slide/sheet.
|
|
*
|
|
* Captures per-page information including dimensions, content counts,
|
|
* and visibility state (for presentations).
|
|
*/
|
|
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
|
@JsonDeserialize(builder = PageInfo.Builder.class)
|
|
public record PageInfo(
|
|
/**
|
|
* Page number (1-indexed)
|
|
*/
|
|
@JsonProperty("number") int number,
|
|
/**
|
|
* Page title (usually for presentations)
|
|
*/
|
|
@Nullable @JsonProperty("title") String title,
|
|
/**
|
|
* Dimensions in points (PDF) or pixels (images): (width, height)
|
|
*/
|
|
@Nullable @JsonProperty("dimensions") List<Double> dimensions,
|
|
/**
|
|
* Number of images on this page
|
|
*/
|
|
@Nullable @JsonProperty("image_count") Integer imageCount,
|
|
/**
|
|
* Number of tables on this page
|
|
*/
|
|
@Nullable @JsonProperty("table_count") Integer tableCount,
|
|
/**
|
|
* Whether this page is hidden (e.g., in presentations)
|
|
*/
|
|
@Nullable @JsonProperty("hidden") Boolean hidden,
|
|
/**
|
|
* Whether this page is blank (no meaningful text, no images, no tables)
|
|
*
|
|
* A page is considered blank if it has fewer than 3 non-whitespace characters
|
|
* and contains no tables or images. This is useful for filtering out empty pages
|
|
* in scanned documents or PDFs with blank separator pages.
|
|
*/
|
|
@Nullable @JsonProperty("is_blank") Boolean isBlank,
|
|
/**
|
|
* Whether this page contains non-trivial vector graphics (paths, shapes, curves)
|
|
*
|
|
* Indicates the presence of vector-drawn content such as charts, diagrams,
|
|
* or geometric shapes (e.g., from Adobe InDesign, LaTeX TikZ). These are
|
|
* invisible to {@code ExtractionResult.images} since they are not embedded as raster
|
|
* XObjects. Set to {@code true} when path count exceeds a heuristic threshold,
|
|
* signaling that downstream consumers may want to rasterize the page to
|
|
* capture this content.
|
|
*
|
|
* Only populated for PDFs; {@code None} for other document types.
|
|
*/
|
|
@Nullable @JsonProperty("has_vector_graphics") Boolean hasVectorGraphics
|
|
) {
|
|
public static Builder builder() {
|
|
return new Builder();
|
|
}
|
|
|
|
// CPD-OFF
|
|
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
|
public static final class Builder {
|
|
|
|
private int number = 0;
|
|
private String title = null;
|
|
private List<Double> dimensions = null;
|
|
@JsonProperty("image_count")
|
|
private Integer imageCount = null;
|
|
@JsonProperty("table_count")
|
|
private Integer tableCount = null;
|
|
private Boolean hidden = null;
|
|
@JsonProperty("is_blank")
|
|
private Boolean isBlank = null;
|
|
@JsonProperty("has_vector_graphics")
|
|
private Boolean hasVectorGraphics = null;
|
|
|
|
/** Sets the number field. */
|
|
@JsonProperty("number")
|
|
public Builder withNumber(final int value) {
|
|
this.number = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the title field. */
|
|
@JsonProperty("title")
|
|
public Builder withTitle(final @Nullable String value) {
|
|
this.title = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the dimensions field. */
|
|
@JsonProperty("dimensions")
|
|
public Builder withDimensions(final @Nullable List<Double> value) {
|
|
this.dimensions = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the imageCount field. */
|
|
@JsonProperty("image_count")
|
|
public Builder withImageCount(final @Nullable int value) {
|
|
this.imageCount = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the tableCount field. */
|
|
@JsonProperty("table_count")
|
|
public Builder withTableCount(final @Nullable int value) {
|
|
this.tableCount = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the hidden field. */
|
|
@JsonProperty("hidden")
|
|
public Builder withHidden(final @Nullable boolean value) {
|
|
this.hidden = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the isBlank field. */
|
|
@JsonProperty("is_blank")
|
|
public Builder withIsBlank(final @Nullable boolean value) {
|
|
this.isBlank = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the hasVectorGraphics field. */
|
|
@JsonProperty("has_vector_graphics")
|
|
public Builder withHasVectorGraphics(final @Nullable Boolean value) {
|
|
this.hasVectorGraphics = value;
|
|
return this;
|
|
}
|
|
|
|
/** Builds the PageInfo instance. */
|
|
public PageInfo build() {
|
|
return new PageInfo(
|
|
number,
|
|
title,
|
|
dimensions,
|
|
imageCount,
|
|
tableCount,
|
|
hidden,
|
|
isBlank,
|
|
hasVectorGraphics
|
|
);
|
|
}
|
|
}
|
|
// CPD-ON
|
|
}
|