Files
fil/packages/java/dev/kreuzberg/PaddleOcrConfig.java
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

302 lines
11 KiB
Java
Generated

// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* Configuration for PaddleOCR backend.
*
* Configures PaddleOCR text detection and recognition with multi-language support.
* Uses a builder pattern for convenient configuration.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = PaddleOcrConfig.Builder.class)
public record PaddleOcrConfig(
/**
* Language code (e.g., "en", "ch", "jpn", "kor", "deu", "fra")
*/
@JsonProperty("language") String language,
/**
* Optional custom cache directory for model files
*/
@JsonProperty("cache_dir") java.nio.file.@Nullable Path cacheDir,
/**
* Enable angle classification for rotated text (default: false).
* Can misfire on short text regions, rotating crops incorrectly before recognition.
*/
@JsonProperty("use_angle_cls") boolean useAngleCls,
/**
* Enable table structure detection (default: false)
*/
@JsonProperty("enable_table_detection") boolean enableTableDetection,
/**
* Database threshold for text detection (default: 0.3)
* Range: 0.0-1.0, higher values require more confident detections
*/
@JsonProperty("det_db_thresh") float detDbThresh,
/**
* Box threshold for text bounding box refinement (default: 0.5)
* Range: 0.0-1.0
*/
@JsonProperty("det_db_box_thresh") float detDbBoxThresh,
/**
* Unclip ratio for expanding text bounding boxes (default: 1.6)
* Controls the expansion of detected text regions
*/
@JsonProperty("det_db_unclip_ratio") float detDbUnclipRatio,
/**
* Maximum side length for detection image (default: 960)
* Larger images may be resized to this limit for faster inference
*/
@JsonProperty("det_limit_side_len") int detLimitSideLen,
/**
* Batch size for recognition inference (default: 6)
* Number of text regions to process simultaneously
*/
@JsonProperty("rec_batch_num") int recBatchNum,
/**
* Padding in pixels added around the image before detection (default: 10).
* Large values can include surrounding content like table gridlines.
*/
@JsonProperty("padding") int padding,
/**
* Minimum recognition confidence score for text lines (default: 0.5).
* Text regions with recognition confidence below this threshold are discarded.
* Matches PaddleOCR Python's {@code drop_score} parameter.
* Range: 0.0-1.0
*/
@JsonProperty("drop_score") float dropScore,
/**
* Model tier controlling detection/recognition model size and accuracy trade-off.
* - {@code "mobile"} (default): Lightweight models (~4.5MB detection, ~16.5MB recognition), fast download and inference
* - {@code "server"}: Large, high-accuracy models (~88MB detection, ~84MB recognition), best for GPU or complex documents
*/
@JsonProperty("model_tier") String modelTier
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private String language = "";
@JsonProperty("cache_dir")
private java.nio.file.Path cacheDir = null;
@JsonProperty("use_angle_cls")
private boolean useAngleCls = false;
@JsonProperty("enable_table_detection")
private boolean enableTableDetection = false;
@JsonProperty("det_db_thresh")
private float detDbThresh = 0.0f;
@JsonProperty("det_db_box_thresh")
private float detDbBoxThresh = 0.0f;
@JsonProperty("det_db_unclip_ratio")
private float detDbUnclipRatio = 0.0f;
@JsonProperty("det_limit_side_len")
private int detLimitSideLen = 0;
@JsonProperty("rec_batch_num")
private int recBatchNum = 0;
private int padding = 0;
@JsonProperty("drop_score")
private float dropScore = 0.0f;
@JsonProperty("model_tier")
private String modelTier = "";
/** Sets the language field. */
@JsonProperty("language")
public Builder withLanguage(final String value) {
this.language = value;
return this;
}
/** Sets the cacheDir field. */
@JsonProperty("cache_dir")
public Builder withCacheDir(final java.nio.file.@Nullable Path value) {
this.cacheDir = value;
return this;
}
/** Sets the useAngleCls field. */
@JsonProperty("use_angle_cls")
public Builder withUseAngleCls(final boolean value) {
this.useAngleCls = value;
return this;
}
/** Sets the enableTableDetection field. */
@JsonProperty("enable_table_detection")
public Builder withEnableTableDetection(final boolean value) {
this.enableTableDetection = value;
return this;
}
/** Sets the detDbThresh field. */
@JsonProperty("det_db_thresh")
public Builder withDetDbThresh(final float value) {
this.detDbThresh = value;
return this;
}
/** Sets the detDbBoxThresh field. */
@JsonProperty("det_db_box_thresh")
public Builder withDetDbBoxThresh(final float value) {
this.detDbBoxThresh = value;
return this;
}
/** Sets the detDbUnclipRatio field. */
@JsonProperty("det_db_unclip_ratio")
public Builder withDetDbUnclipRatio(final float value) {
this.detDbUnclipRatio = value;
return this;
}
/** Sets the detLimitSideLen field. */
@JsonProperty("det_limit_side_len")
public Builder withDetLimitSideLen(final int value) {
this.detLimitSideLen = value;
return this;
}
/** Sets the recBatchNum field. */
@JsonProperty("rec_batch_num")
public Builder withRecBatchNum(final int value) {
this.recBatchNum = value;
return this;
}
/** Sets the padding field. */
@JsonProperty("padding")
public Builder withPadding(final int value) {
this.padding = value;
return this;
}
/** Sets the dropScore field. */
@JsonProperty("drop_score")
public Builder withDropScore(final float value) {
this.dropScore = value;
return this;
}
/** Sets the modelTier field. */
@JsonProperty("model_tier")
public Builder withModelTier(final String value) {
this.modelTier = value;
return this;
}
/** Builds the PaddleOcrConfig instance. */
public PaddleOcrConfig build() {
return new PaddleOcrConfig(
language,
cacheDir,
useAngleCls,
enableTableDetection,
detDbThresh,
detDbBoxThresh,
detDbUnclipRatio,
detLimitSideLen,
recBatchNum,
padding,
dropScore,
modelTier
);
}
}
// CPD-ON
/**
* Sets a custom cache directory for model files.
* {@literal @}param path Path to cache directory
*/
public PaddleOcrConfig withCacheDir(java.nio.file.Path path) {
throw new UnsupportedOperationException("withCacheDir is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Enables or disables table structure detection.
* {@literal @}param enable Whether to enable table detection
*/
public PaddleOcrConfig withTableDetection(boolean enable) {
throw new UnsupportedOperationException("withTableDetection is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Enables or disables angle classification for rotated text.
* {@literal @}param enable Whether to enable angle classification
*/
public PaddleOcrConfig withAngleCls(boolean enable) {
throw new UnsupportedOperationException("withAngleCls is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the database threshold for text detection.
* {@literal @}param threshold Detection threshold (0.0-1.0)
*/
public PaddleOcrConfig withDetDbThresh(float threshold) {
throw new UnsupportedOperationException("withDetDbThresh is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the box threshold for text bounding box refinement.
* {@literal @}param threshold Box threshold (0.0-1.0)
*/
public PaddleOcrConfig withDetDbBoxThresh(float threshold) {
throw new UnsupportedOperationException("withDetDbBoxThresh is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the unclip ratio for expanding text bounding boxes.
* {@literal @}param ratio Unclip ratio (typically 1.5-2.0)
*/
public PaddleOcrConfig withDetDbUnclipRatio(float ratio) {
throw new UnsupportedOperationException("withDetDbUnclipRatio is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the maximum side length for detection images.
* {@literal @}param length Maximum side length in pixels
*/
public PaddleOcrConfig withDetLimitSideLen(int length) {
throw new UnsupportedOperationException("withDetLimitSideLen is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the batch size for recognition inference.
* {@literal @}param batch_size Number of text regions to process simultaneously
*/
public PaddleOcrConfig withRecBatchNum(int batchSize) {
throw new UnsupportedOperationException("withRecBatchNum is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the minimum recognition confidence threshold.
* {@literal @}param score Minimum confidence (0.0-1.0), text below this is dropped
*/
public PaddleOcrConfig withDropScore(float score) {
throw new UnsupportedOperationException("withDropScore is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets padding in pixels added around images before detection.
* {@literal @}param padding Padding in pixels (0-100)
*/
public PaddleOcrConfig withPadding(int padding) {
throw new UnsupportedOperationException("withPadding is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Sets the model tier controlling detection/recognition model size.
* {@literal @}param tier {@code "mobile"} (default, lightweight, faster) or {@code "server"} (high accuracy, GPU/complex documents)
*/
public PaddleOcrConfig withModelTier(String tier) {
throw new UnsupportedOperationException("withModelTier is not yet bridged via JNI; reconstruct via Builder.");
}
/**
* Creates a default configuration with English language support.
*/
public static PaddleOcrConfig defaultInstance() {
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
}
}