// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * Image extraction configuration. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = ImageExtractionConfig.Builder.class) public record ImageExtractionConfig( /** * Extract images from documents */ @Nullable @JsonProperty("extract_images") Boolean extractImages, /** * Target DPI for image normalization */ @Nullable @JsonProperty("target_dpi") Integer targetDpi, /** * Maximum dimension for images (width or height) */ @Nullable @JsonProperty("max_image_dimension") Integer maxImageDimension, /** * Whether to inject image reference placeholders into markdown output. * When {@code true} (default), image references like {@code ![Image 1](embedded:p1_i0)} * are appended to the markdown. Set to {@code false} to extract images as data * without polluting the markdown output. */ @Nullable @JsonProperty("inject_placeholders") Boolean injectPlaceholders, /** * Automatically adjust DPI based on image content */ @Nullable @JsonProperty("auto_adjust_dpi") Boolean autoAdjustDpi, /** * Minimum DPI threshold */ @Nullable @JsonProperty("min_dpi") Integer minDpi, /** * Maximum DPI threshold */ @Nullable @JsonProperty("max_dpi") Integer maxDpi, /** * Maximum number of image objects to extract per PDF page. * * Some PDFs (e.g. technical diagrams stored as thousands of raster fragments) * can trigger extremely long or indefinite extraction times when every image * object on a dense page is decoded individually via the PDF extractor. Setting this * limit causes kreuzberg to stop collecting individual images once the count * per page reaches the cap and emit a warning instead. * * {@code None} (default) means no limit — all images are extracted. */ @Nullable @JsonProperty("max_images_per_page") Integer maxImagesPerPage, /** * When {@code true} (default), extracted images are classified by kind and grouped * into clusters where they appear to belong to one figure. */ @Nullable @JsonProperty("classify") Boolean classify, /** * When {@code true}, full-page renders produced during OCR preprocessing are captured * and returned as {@code ImageKind.PageRaster} entries in {@code ExtractionResult.images}. * * **PDF + OCR only.** No rasters are captured for non-PDF inputs or when the * document-level OCR bypass is active (whole-document backend). When OCR is * enabled and this flag is set but the active backend skips per-page rendering, * a {@code ProcessingWarning} is emitted in {@code ExtractionResult.processing_warnings}. * * Defaults to {@code false}. Enable when downstream consumers need page thumbnails * (e.g. citation previews, visual grounding). */ @Nullable @JsonProperty("include_page_rasters") Boolean includePageRasters, /** * Run OCR on extracted images and include the recognized text in the document content. * * When {@code true} (default) and {@code ExtractionConfig.ocr} is configured, extracted images * are processed with the configured OCR backend. Set to {@code false} to extract images * without OCR processing, even when OCR is enabled. */ @Nullable @JsonProperty("run_ocr_on_images") Boolean runOcrOnImages, /** * When {@code true}, image OCR results are rendered as plain text without the * {@code ![...](...)} markdown placeholder. Only takes effect when {@code run_ocr_on_images} * is also {@code true}. */ @Nullable @JsonProperty("ocr_text_only") Boolean ocrTextOnly, /** * When {@code true} and {@code ocr_text_only} is {@code false}, append the OCR text after * the image placeholder in the rendered output. */ @Nullable @JsonProperty("append_ocr_text") Boolean appendOcrText ) { public static Builder builder() { return new Builder(); } public ImageExtractionConfig{ if (targetDpi == null) targetDpi = 300; if (maxImageDimension == null) maxImageDimension = 4096; if (minDpi == null) minDpi = 72; if (maxDpi == null) maxDpi = 600; } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { @JsonProperty("extract_images") private Boolean extractImages = null; @JsonProperty("target_dpi") private Integer targetDpi = null; @JsonProperty("max_image_dimension") private Integer maxImageDimension = null; @JsonProperty("inject_placeholders") private Boolean injectPlaceholders = null; @JsonProperty("auto_adjust_dpi") private Boolean autoAdjustDpi = null; @JsonProperty("min_dpi") private Integer minDpi = null; @JsonProperty("max_dpi") private Integer maxDpi = null; @JsonProperty("max_images_per_page") private Integer maxImagesPerPage = null; private Boolean classify = null; @JsonProperty("include_page_rasters") private Boolean includePageRasters = null; @JsonProperty("run_ocr_on_images") private Boolean runOcrOnImages = null; @JsonProperty("ocr_text_only") private Boolean ocrTextOnly = null; @JsonProperty("append_ocr_text") private Boolean appendOcrText = null; /** Sets the extractImages field. */ @JsonProperty("extract_images") public Builder withExtractImages(final @Nullable Boolean value) { this.extractImages = value; return this; } /** Sets the targetDpi field. */ @JsonProperty("target_dpi") public Builder withTargetDpi(final @Nullable Integer value) { this.targetDpi = value; return this; } /** Sets the maxImageDimension field. */ @JsonProperty("max_image_dimension") public Builder withMaxImageDimension(final @Nullable Integer value) { this.maxImageDimension = value; return this; } /** Sets the injectPlaceholders field. */ @JsonProperty("inject_placeholders") public Builder withInjectPlaceholders(final @Nullable Boolean value) { this.injectPlaceholders = value; return this; } /** Sets the autoAdjustDpi field. */ @JsonProperty("auto_adjust_dpi") public Builder withAutoAdjustDpi(final @Nullable Boolean value) { this.autoAdjustDpi = value; return this; } /** Sets the minDpi field. */ @JsonProperty("min_dpi") public Builder withMinDpi(final @Nullable Integer value) { this.minDpi = value; return this; } /** Sets the maxDpi field. */ @JsonProperty("max_dpi") public Builder withMaxDpi(final @Nullable Integer value) { this.maxDpi = value; return this; } /** Sets the maxImagesPerPage field. */ @JsonProperty("max_images_per_page") public Builder withMaxImagesPerPage(final @Nullable Integer value) { this.maxImagesPerPage = value; return this; } /** Sets the classify field. */ @JsonProperty("classify") public Builder withClassify(final @Nullable Boolean value) { this.classify = value; return this; } /** Sets the includePageRasters field. */ @JsonProperty("include_page_rasters") public Builder withIncludePageRasters(final @Nullable Boolean value) { this.includePageRasters = value; return this; } /** Sets the runOcrOnImages field. */ @JsonProperty("run_ocr_on_images") public Builder withRunOcrOnImages(final @Nullable Boolean value) { this.runOcrOnImages = value; return this; } /** Sets the ocrTextOnly field. */ @JsonProperty("ocr_text_only") public Builder withOcrTextOnly(final @Nullable Boolean value) { this.ocrTextOnly = value; return this; } /** Sets the appendOcrText field. */ @JsonProperty("append_ocr_text") public Builder withAppendOcrText(final @Nullable Boolean value) { this.appendOcrText = value; return this; } /** Builds the ImageExtractionConfig instance. */ public ImageExtractionConfig build() { return new ImageExtractionConfig( extractImages, targetDpi, maxImageDimension, injectPlaceholders, autoAdjustDpi, minDpi, maxDpi, maxImagesPerPage, classify, includePageRasters, runOcrOnImages, ocrTextOnly, appendOcrText ); } } // CPD-ON public static ImageExtractionConfig defaultInstance() { throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead."); } }