// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * Configuration for PaddleOCR backend. * * Configures PaddleOCR text detection and recognition with multi-language support. * Uses a builder pattern for convenient configuration. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = PaddleOcrConfig.Builder.class) public record PaddleOcrConfig( /** * Language code (e.g., "en", "ch", "jpn", "kor", "deu", "fra") */ @JsonProperty("language") String language, /** * Optional custom cache directory for model files */ @JsonProperty("cache_dir") java.nio.file.@Nullable Path cacheDir, /** * Enable angle classification for rotated text (default: false). * Can misfire on short text regions, rotating crops incorrectly before recognition. */ @JsonProperty("use_angle_cls") boolean useAngleCls, /** * Enable table structure detection (default: false) */ @JsonProperty("enable_table_detection") boolean enableTableDetection, /** * Database threshold for text detection (default: 0.3) * Range: 0.0-1.0, higher values require more confident detections */ @JsonProperty("det_db_thresh") float detDbThresh, /** * Box threshold for text bounding box refinement (default: 0.5) * Range: 0.0-1.0 */ @JsonProperty("det_db_box_thresh") float detDbBoxThresh, /** * Unclip ratio for expanding text bounding boxes (default: 1.6) * Controls the expansion of detected text regions */ @JsonProperty("det_db_unclip_ratio") float detDbUnclipRatio, /** * Maximum side length for detection image (default: 960) * Larger images may be resized to this limit for faster inference */ @JsonProperty("det_limit_side_len") int detLimitSideLen, /** * Batch size for recognition inference (default: 6) * Number of text regions to process simultaneously */ @JsonProperty("rec_batch_num") int recBatchNum, /** * Padding in pixels added around the image before detection (default: 10). * Large values can include surrounding content like table gridlines. */ @JsonProperty("padding") int padding, /** * Minimum recognition confidence score for text lines (default: 0.5). * Text regions with recognition confidence below this threshold are discarded. * Matches PaddleOCR Python's {@code drop_score} parameter. * Range: 0.0-1.0 */ @JsonProperty("drop_score") float dropScore, /** * Model tier controlling detection/recognition model size and accuracy trade-off. * - {@code "mobile"} (default): Lightweight models (~4.5MB detection, ~16.5MB recognition), fast download and inference * - {@code "server"}: Large, high-accuracy models (~88MB detection, ~84MB recognition), best for GPU or complex documents */ @JsonProperty("model_tier") String modelTier ) { public static Builder builder() { return new Builder(); } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { private String language = ""; @JsonProperty("cache_dir") private java.nio.file.Path cacheDir = null; @JsonProperty("use_angle_cls") private boolean useAngleCls = false; @JsonProperty("enable_table_detection") private boolean enableTableDetection = false; @JsonProperty("det_db_thresh") private float detDbThresh = 0.0f; @JsonProperty("det_db_box_thresh") private float detDbBoxThresh = 0.0f; @JsonProperty("det_db_unclip_ratio") private float detDbUnclipRatio = 0.0f; @JsonProperty("det_limit_side_len") private int detLimitSideLen = 0; @JsonProperty("rec_batch_num") private int recBatchNum = 0; private int padding = 0; @JsonProperty("drop_score") private float dropScore = 0.0f; @JsonProperty("model_tier") private String modelTier = ""; /** Sets the language field. */ @JsonProperty("language") public Builder withLanguage(final String value) { this.language = value; return this; } /** Sets the cacheDir field. */ @JsonProperty("cache_dir") public Builder withCacheDir(final java.nio.file.@Nullable Path value) { this.cacheDir = value; return this; } /** Sets the useAngleCls field. */ @JsonProperty("use_angle_cls") public Builder withUseAngleCls(final boolean value) { this.useAngleCls = value; return this; } /** Sets the enableTableDetection field. */ @JsonProperty("enable_table_detection") public Builder withEnableTableDetection(final boolean value) { this.enableTableDetection = value; return this; } /** Sets the detDbThresh field. */ @JsonProperty("det_db_thresh") public Builder withDetDbThresh(final float value) { this.detDbThresh = value; return this; } /** Sets the detDbBoxThresh field. */ @JsonProperty("det_db_box_thresh") public Builder withDetDbBoxThresh(final float value) { this.detDbBoxThresh = value; return this; } /** Sets the detDbUnclipRatio field. */ @JsonProperty("det_db_unclip_ratio") public Builder withDetDbUnclipRatio(final float value) { this.detDbUnclipRatio = value; return this; } /** Sets the detLimitSideLen field. */ @JsonProperty("det_limit_side_len") public Builder withDetLimitSideLen(final int value) { this.detLimitSideLen = value; return this; } /** Sets the recBatchNum field. */ @JsonProperty("rec_batch_num") public Builder withRecBatchNum(final int value) { this.recBatchNum = value; return this; } /** Sets the padding field. */ @JsonProperty("padding") public Builder withPadding(final int value) { this.padding = value; return this; } /** Sets the dropScore field. */ @JsonProperty("drop_score") public Builder withDropScore(final float value) { this.dropScore = value; return this; } /** Sets the modelTier field. */ @JsonProperty("model_tier") public Builder withModelTier(final String value) { this.modelTier = value; return this; } /** Builds the PaddleOcrConfig instance. */ public PaddleOcrConfig build() { return new PaddleOcrConfig( language, cacheDir, useAngleCls, enableTableDetection, detDbThresh, detDbBoxThresh, detDbUnclipRatio, detLimitSideLen, recBatchNum, padding, dropScore, modelTier ); } } // CPD-ON /** * Sets a custom cache directory for model files. * {@literal @}param path Path to cache directory */ public PaddleOcrConfig withCacheDir(java.nio.file.Path path) { throw new UnsupportedOperationException("withCacheDir is not yet bridged via JNI; reconstruct via Builder."); } /** * Enables or disables table structure detection. * {@literal @}param enable Whether to enable table detection */ public PaddleOcrConfig withTableDetection(boolean enable) { throw new UnsupportedOperationException("withTableDetection is not yet bridged via JNI; reconstruct via Builder."); } /** * Enables or disables angle classification for rotated text. * {@literal @}param enable Whether to enable angle classification */ public PaddleOcrConfig withAngleCls(boolean enable) { throw new UnsupportedOperationException("withAngleCls is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the database threshold for text detection. * {@literal @}param threshold Detection threshold (0.0-1.0) */ public PaddleOcrConfig withDetDbThresh(float threshold) { throw new UnsupportedOperationException("withDetDbThresh is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the box threshold for text bounding box refinement. * {@literal @}param threshold Box threshold (0.0-1.0) */ public PaddleOcrConfig withDetDbBoxThresh(float threshold) { throw new UnsupportedOperationException("withDetDbBoxThresh is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the unclip ratio for expanding text bounding boxes. * {@literal @}param ratio Unclip ratio (typically 1.5-2.0) */ public PaddleOcrConfig withDetDbUnclipRatio(float ratio) { throw new UnsupportedOperationException("withDetDbUnclipRatio is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the maximum side length for detection images. * {@literal @}param length Maximum side length in pixels */ public PaddleOcrConfig withDetLimitSideLen(int length) { throw new UnsupportedOperationException("withDetLimitSideLen is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the batch size for recognition inference. * {@literal @}param batch_size Number of text regions to process simultaneously */ public PaddleOcrConfig withRecBatchNum(int batchSize) { throw new UnsupportedOperationException("withRecBatchNum is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the minimum recognition confidence threshold. * {@literal @}param score Minimum confidence (0.0-1.0), text below this is dropped */ public PaddleOcrConfig withDropScore(float score) { throw new UnsupportedOperationException("withDropScore is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets padding in pixels added around images before detection. * {@literal @}param padding Padding in pixels (0-100) */ public PaddleOcrConfig withPadding(int padding) { throw new UnsupportedOperationException("withPadding is not yet bridged via JNI; reconstruct via Builder."); } /** * Sets the model tier controlling detection/recognition model size. * {@literal @}param tier {@code "mobile"} (default, lightweight, faster) or {@code "server"} (high accuracy, GPU/complex documents) */ public PaddleOcrConfig withModelTier(String tier) { throw new UnsupportedOperationException("withModelTier is not yet bridged via JNI; reconstruct via Builder."); } /** * Creates a default configuration with English language support. */ public static PaddleOcrConfig defaultInstance() { throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead."); } }