Files
fil/packages/java/dev/kreuzberg/ExtractedImage.java

270 lines
8.9 KiB
Java
Raw Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* Extracted image from a document.
*
* Contains raw image data, metadata, and optional nested OCR results.
* Raw bytes allow cross-language compatibility - users can convert to
* PIL.Image (Python), Sharp (Node.js), or other formats as needed.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = ExtractedImage.Builder.class)
public record ExtractedImage(
/**
* Raw image data (PNG, JPEG, WebP, etc. bytes).
* Uses {@code bytes.Bytes} for cheap cloning of large buffers.
*/
@JsonSerialize(using = ByteArrayToIntArraySerializer.class) @JsonProperty("data") byte[] data,
/**
* Image format (e.g., "jpeg", "png", "webp")
* Uses Cow<, str> to avoid allocation for static literals.
*/
@JsonProperty("format") String format,
/**
* Zero-indexed position of this image in the document/page
*/
@JsonProperty("image_index") int imageIndex,
/**
* Page/slide number where image was found (1-indexed)
*/
@Nullable @JsonProperty("page_number") Integer pageNumber,
/**
* Image width in pixels
*/
@Nullable @JsonProperty("width") Integer width,
/**
* Image height in pixels
*/
@Nullable @JsonProperty("height") Integer height,
/**
* Colorspace information (e.g., "RGB", "CMYK", "Gray")
*/
@Nullable @JsonProperty("colorspace") String colorspace,
/**
* Bits per color component (e.g., 8, 16)
*/
@Nullable @JsonProperty("bits_per_component") Integer bitsPerComponent,
/**
* Whether this image is a mask image
*/
@Nullable @JsonProperty("is_mask") Boolean isMask,
/**
* Optional description of the image
*/
@Nullable @JsonProperty("description") String description,
/**
* Nested OCR extraction result (if image was OCRed)
*
* When OCR is performed on this image, the result is embedded here
* rather than in a separate collection, making the relationship explicit.
*/
@Nullable @JsonProperty("ocr_result") ExtractionResult ocrResult,
/**
* Bounding box of the image on the page (PDF coordinates: x0=left, y0=bottom, x1=right, y1=top).
* Only populated for PDF-extracted images when position data is available from the PDF extractor.
*/
@Nullable @JsonProperty("bounding_box") BoundingBox boundingBox,
/**
* Original source path of the image within the document archive (e.g., "media/image1.png" in DOCX).
* Used for rendering image references when the binary data is not extracted.
*/
@Nullable @JsonProperty("source_path") String sourcePath,
/**
* Heuristic classification of what this image likely depicts.
* {@code None} if classification was disabled or inconclusive.
*/
@Nullable @JsonProperty("image_kind") ImageKind imageKind,
/**
* Confidence score for {@code image_kind}, in the range 0.0 to 1.0.
*/
@Nullable @JsonProperty("kind_confidence") Float kindConfidence,
/**
* Identifier shared across images that form a single logical figure
* (e.g. all raster tiles of one technical drawing). {@code None} for singletons.
*/
@Nullable @JsonProperty("cluster_id") Integer clusterId
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private byte[] data = new byte[0];
private String format = "";
@JsonProperty("image_index")
private int imageIndex = 0;
@JsonProperty("page_number")
private Integer pageNumber = null;
private Integer width = null;
private Integer height = null;
private String colorspace = null;
@JsonProperty("bits_per_component")
private Integer bitsPerComponent = null;
@JsonProperty("is_mask")
private Boolean isMask = null;
private String description = null;
@JsonProperty("ocr_result")
private ExtractionResult ocrResult = null;
@JsonProperty("bounding_box")
@Nullable private BoundingBox boundingBox = null;
@JsonProperty("source_path")
private String sourcePath = null;
@JsonProperty("image_kind")
@Nullable private ImageKind imageKind = null;
@JsonProperty("kind_confidence")
private Float kindConfidence = null;
@JsonProperty("cluster_id")
private Integer clusterId = null;
/** Sets the data field. */
@JsonProperty("data")
public Builder withData(final byte[] value) {
this.data = value;
return this;
}
/** Sets the format field. */
@JsonProperty("format")
public Builder withFormat(final String value) {
this.format = value;
return this;
}
/** Sets the imageIndex field. */
@JsonProperty("image_index")
public Builder withImageIndex(final int value) {
this.imageIndex = value;
return this;
}
/** Sets the pageNumber field. */
@JsonProperty("page_number")
public Builder withPageNumber(final @Nullable int value) {
this.pageNumber = value;
return this;
}
/** Sets the width field. */
@JsonProperty("width")
public Builder withWidth(final @Nullable int value) {
this.width = value;
return this;
}
/** Sets the height field. */
@JsonProperty("height")
public Builder withHeight(final @Nullable int value) {
this.height = value;
return this;
}
/** Sets the colorspace field. */
@JsonProperty("colorspace")
public Builder withColorspace(final @Nullable String value) {
this.colorspace = value;
return this;
}
/** Sets the bitsPerComponent field. */
@JsonProperty("bits_per_component")
public Builder withBitsPerComponent(final @Nullable int value) {
this.bitsPerComponent = value;
return this;
}
/** Sets the isMask field. */
@JsonProperty("is_mask")
public Builder withIsMask(final @Nullable Boolean value) {
this.isMask = value;
return this;
}
/** Sets the description field. */
@JsonProperty("description")
public Builder withDescription(final @Nullable String value) {
this.description = value;
return this;
}
/** Sets the ocrResult field. */
@JsonProperty("ocr_result")
public Builder withOcrResult(final @Nullable ExtractionResult value) {
this.ocrResult = value;
return this;
}
/** Sets the boundingBox field. */
@JsonProperty("bounding_box")
public Builder withBoundingBox(final @Nullable BoundingBox value) {
this.boundingBox = value;
return this;
}
/** Sets the sourcePath field. */
@JsonProperty("source_path")
public Builder withSourcePath(final @Nullable String value) {
this.sourcePath = value;
return this;
}
/** Sets the imageKind field. */
@JsonProperty("image_kind")
public Builder withImageKind(final @Nullable ImageKind value) {
this.imageKind = value;
return this;
}
/** Sets the kindConfidence field. */
@JsonProperty("kind_confidence")
public Builder withKindConfidence(final @Nullable Float value) {
this.kindConfidence = value;
return this;
}
/** Sets the clusterId field. */
@JsonProperty("cluster_id")
public Builder withClusterId(final @Nullable Integer value) {
this.clusterId = value;
return this;
}
/** Builds the ExtractedImage instance. */
public ExtractedImage build() {
return new ExtractedImage(
data,
format,
imageIndex,
pageNumber,
width,
height,
colorspace,
bitsPerComponent,
isMask,
description,
ocrResult,
boundingBox,
sourcePath,
imageKind,
kindConfidence,
clusterId
);
}
}
// CPD-ON
}