// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * Extracted image from a document. * * Contains raw image data, metadata, and optional nested OCR results. * Raw bytes allow cross-language compatibility - users can convert to * PIL.Image (Python), Sharp (Node.js), or other formats as needed. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = ExtractedImage.Builder.class) public record ExtractedImage( /** * Raw image data (PNG, JPEG, WebP, etc. bytes). * Uses {@code bytes.Bytes} for cheap cloning of large buffers. */ @JsonSerialize(using = ByteArrayToIntArraySerializer.class) @JsonProperty("data") byte[] data, /** * Image format (e.g., "jpeg", "png", "webp") * Uses Cow<, str> to avoid allocation for static literals. */ @JsonProperty("format") String format, /** * Zero-indexed position of this image in the document/page */ @JsonProperty("image_index") int imageIndex, /** * Page/slide number where image was found (1-indexed) */ @Nullable @JsonProperty("page_number") Integer pageNumber, /** * Image width in pixels */ @Nullable @JsonProperty("width") Integer width, /** * Image height in pixels */ @Nullable @JsonProperty("height") Integer height, /** * Colorspace information (e.g., "RGB", "CMYK", "Gray") */ @Nullable @JsonProperty("colorspace") String colorspace, /** * Bits per color component (e.g., 8, 16) */ @Nullable @JsonProperty("bits_per_component") Integer bitsPerComponent, /** * Whether this image is a mask image */ @Nullable @JsonProperty("is_mask") Boolean isMask, /** * Optional description of the image */ @Nullable @JsonProperty("description") String description, /** * Nested OCR extraction result (if image was OCRed) * * When OCR is performed on this image, the result is embedded here * rather than in a separate collection, making the relationship explicit. */ @Nullable @JsonProperty("ocr_result") ExtractionResult ocrResult, /** * Bounding box of the image on the page (PDF coordinates: x0=left, y0=bottom, x1=right, y1=top). * Only populated for PDF-extracted images when position data is available from the PDF extractor. */ @Nullable @JsonProperty("bounding_box") BoundingBox boundingBox, /** * Original source path of the image within the document archive (e.g., "media/image1.png" in DOCX). * Used for rendering image references when the binary data is not extracted. */ @Nullable @JsonProperty("source_path") String sourcePath, /** * Heuristic classification of what this image likely depicts. * {@code None} if classification was disabled or inconclusive. */ @Nullable @JsonProperty("image_kind") ImageKind imageKind, /** * Confidence score for {@code image_kind}, in the range 0.0 to 1.0. */ @Nullable @JsonProperty("kind_confidence") Float kindConfidence, /** * Identifier shared across images that form a single logical figure * (e.g. all raster tiles of one technical drawing). {@code None} for singletons. */ @Nullable @JsonProperty("cluster_id") Integer clusterId ) { public static Builder builder() { return new Builder(); } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { private byte[] data = new byte[0]; private String format = ""; @JsonProperty("image_index") private int imageIndex = 0; @JsonProperty("page_number") private Integer pageNumber = null; private Integer width = null; private Integer height = null; private String colorspace = null; @JsonProperty("bits_per_component") private Integer bitsPerComponent = null; @JsonProperty("is_mask") private Boolean isMask = null; private String description = null; @JsonProperty("ocr_result") private ExtractionResult ocrResult = null; @JsonProperty("bounding_box") @Nullable private BoundingBox boundingBox = null; @JsonProperty("source_path") private String sourcePath = null; @JsonProperty("image_kind") @Nullable private ImageKind imageKind = null; @JsonProperty("kind_confidence") private Float kindConfidence = null; @JsonProperty("cluster_id") private Integer clusterId = null; /** Sets the data field. */ @JsonProperty("data") public Builder withData(final byte[] value) { this.data = value; return this; } /** Sets the format field. */ @JsonProperty("format") public Builder withFormat(final String value) { this.format = value; return this; } /** Sets the imageIndex field. */ @JsonProperty("image_index") public Builder withImageIndex(final int value) { this.imageIndex = value; return this; } /** Sets the pageNumber field. */ @JsonProperty("page_number") public Builder withPageNumber(final @Nullable int value) { this.pageNumber = value; return this; } /** Sets the width field. */ @JsonProperty("width") public Builder withWidth(final @Nullable int value) { this.width = value; return this; } /** Sets the height field. */ @JsonProperty("height") public Builder withHeight(final @Nullable int value) { this.height = value; return this; } /** Sets the colorspace field. */ @JsonProperty("colorspace") public Builder withColorspace(final @Nullable String value) { this.colorspace = value; return this; } /** Sets the bitsPerComponent field. */ @JsonProperty("bits_per_component") public Builder withBitsPerComponent(final @Nullable int value) { this.bitsPerComponent = value; return this; } /** Sets the isMask field. */ @JsonProperty("is_mask") public Builder withIsMask(final @Nullable Boolean value) { this.isMask = value; return this; } /** Sets the description field. */ @JsonProperty("description") public Builder withDescription(final @Nullable String value) { this.description = value; return this; } /** Sets the ocrResult field. */ @JsonProperty("ocr_result") public Builder withOcrResult(final @Nullable ExtractionResult value) { this.ocrResult = value; return this; } /** Sets the boundingBox field. */ @JsonProperty("bounding_box") public Builder withBoundingBox(final @Nullable BoundingBox value) { this.boundingBox = value; return this; } /** Sets the sourcePath field. */ @JsonProperty("source_path") public Builder withSourcePath(final @Nullable String value) { this.sourcePath = value; return this; } /** Sets the imageKind field. */ @JsonProperty("image_kind") public Builder withImageKind(final @Nullable ImageKind value) { this.imageKind = value; return this; } /** Sets the kindConfidence field. */ @JsonProperty("kind_confidence") public Builder withKindConfidence(final @Nullable Float value) { this.kindConfidence = value; return this; } /** Sets the clusterId field. */ @JsonProperty("cluster_id") public Builder withClusterId(final @Nullable Integer value) { this.clusterId = value; return this; } /** Builds the ExtractedImage instance. */ public ExtractedImage build() { return new ExtractedImage( data, format, imageIndex, pageNumber, width, height, colorspace, bitsPerComponent, isMask, description, ocrResult, boundingBox, sourcePath, imageKind, kindConfidence, clusterId ); } } // CPD-ON }