Files
fil/packages/java/dev/kreuzberg/OcrConfig.java

299 lines
11 KiB
Java
Raw Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import java.util.Map;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* OCR configuration.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = OcrConfig.Builder.class)
public record OcrConfig(
/**
* Whether OCR is enabled.
*
* Setting {@code enabled: false} is a shorthand for {@code disable_ocr: true} on the parent
* ExtractionConfig(crate.core.config.ExtractionConfig). Images return
* metadata only; PDFs use native text extraction without OCR fallback.
*
* Defaults to {@code true}. When {@code false}, all other OCR settings are ignored.
*/
@Nullable @JsonProperty("enabled") Boolean enabled,
/**
* OCR backend: tesseract, easyocr, paddleocr
*/
@Nullable @JsonProperty("backend") String backend,
/**
* Language code (e.g., "eng", "deu")
*/
@Nullable @JsonProperty("language") String language,
/**
* Tesseract-specific configuration (optional)
*/
@Nullable @JsonProperty("tesseract_config") TesseractConfig tesseractConfig,
/**
* Output format for OCR results (optional, for format conversion)
*/
@Nullable @JsonProperty("output_format") OutputFormat outputFormat,
/**
* PaddleOCR-specific configuration (optional, JSON passthrough)
*/
@Nullable @JsonProperty("paddle_ocr_config") JsonNode paddleOcrConfig,
/**
* Arbitrary per-call options passed through to the backend unchanged.
*
* Custom OCR backends and built-in backends that support runtime tuning
* can read this value and deserialize the keys they care about. Keys
* unknown to the backend are silently ignored.
*
* This is the recommended extension point for per-call parameters that
* are not covered by the typed fields above (e.g. mode switching,
* preprocessing flags, inference batch size).
*
* **Scope:** when {@code pipeline} is {@code None}, this value is propagated to the
* primary stage of the auto-constructed pipeline. When {@code pipeline} is
* explicitly set, this field has **no effect** the caller must set
* {@code OcrPipelineStage.backend_options} directly on the relevant stage(s)
* instead.
*
* Example:
* {@code }{@code json}
* { "mode": "fast", "enable_layout": true, "timeout_ms": 5000 }
*
*/
@Nullable @JsonProperty("backend_options") JsonNode backendOptions,
/**
* OCR element extraction configuration
*/
@Nullable @JsonProperty("element_config") OcrElementConfig elementConfig,
/**
* Quality thresholds for the native-text-to-OCR fallback decision.
* When null, uses compiled defaults (matching previous hardcoded behavior).
*/
@Nullable @JsonProperty("quality_thresholds") OcrQualityThresholds qualityThresholds,
/**
* Multi-backend OCR pipeline configuration. When set, enables weighted
* fallback across multiple OCR backends based on output quality.
* When null, uses the single {@code backend} field (same as today).
*/
@Nullable @JsonProperty("pipeline") OcrPipelineConfig pipeline,
/**
* Enable automatic page rotation based on orientation detection.
*
* When enabled, uses Tesseract's {@code DetectOrientationScript()} to detect
* page orientation (0/90/180/270 degrees) before OCR. If the page is
* rotated with high confidence, the image is corrected before recognition.
* This is critical for handling rotated scanned documents.
*/
@Nullable @JsonProperty("auto_rotate") Boolean autoRotate,
/**
* VLM (Vision Language Model) OCR configuration.
*
* Required when {@code backend} is {@code "vlm"}. Uses liter-llm to send page
* images to a vision model for text extraction.
*/
@Nullable @JsonProperty("vlm_config") LlmConfig vlmConfig,
/**
* Custom Jinja2 prompt template for VLM OCR.
*
* When {@code None}, uses the default template. Available variables:
* - {@code {{ language }}} The document language code (e.g., "eng", "deu").
*/
@Nullable @JsonProperty("vlm_prompt") String vlmPrompt,
/**
* Hardware acceleration for ONNX Runtime models (e.g. PaddleOCR, layout detection).
*
* Not user-configurable via config files injected at runtime from
* {@code ExtractionConfig.acceleration} before each {@code process_image} call.
*/
@Nullable @JsonProperty("acceleration") AccelerationConfig acceleration,
/**
* Caller-supplied Tesseract {@code traineddata} bytes per language code.
*
* Primary use case is the WASM build, which has no filesystem and cannot
* download tessdata at runtime. Native builds typically rely on
* {@code TessdataManager} and ignore this field. When present, the WASM
* Tesseract backend prefers these bytes over its compile-time-bundled
* English data.
*
* Skipped by serde to keep config files small supply via the typed API
* at runtime.
*/
@Nullable @JsonProperty("tessdata_bytes") Map<String, byte[]> tessdataBytes
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private Boolean enabled = null;
private String backend = null;
private String language = null;
@JsonProperty("tesseract_config")
@Nullable private TesseractConfig tesseractConfig = null;
@JsonProperty("output_format")
@Nullable private OutputFormat outputFormat = null;
@JsonProperty("paddle_ocr_config")
private JsonNode paddleOcrConfig = null;
@JsonProperty("backend_options")
private JsonNode backendOptions = null;
@JsonProperty("element_config")
@Nullable private OcrElementConfig elementConfig = null;
@JsonProperty("quality_thresholds")
@Nullable private OcrQualityThresholds qualityThresholds = null;
@Nullable private OcrPipelineConfig pipeline = null;
@JsonProperty("auto_rotate")
private Boolean autoRotate = null;
@JsonProperty("vlm_config")
@Nullable private LlmConfig vlmConfig = null;
@JsonProperty("vlm_prompt")
private String vlmPrompt = null;
private AccelerationConfig acceleration = null;
@JsonProperty("tessdata_bytes")
private Map<String, byte[]> tessdataBytes = null;
/** Sets the enabled field. */
@JsonProperty("enabled")
public Builder withEnabled(final @Nullable Boolean value) {
this.enabled = value;
return this;
}
/** Sets the backend field. */
@JsonProperty("backend")
public Builder withBackend(final @Nullable String value) {
this.backend = value;
return this;
}
/** Sets the language field. */
@JsonProperty("language")
public Builder withLanguage(final @Nullable String value) {
this.language = value;
return this;
}
/** Sets the tesseractConfig field. */
@JsonProperty("tesseract_config")
public Builder withTesseractConfig(final @Nullable TesseractConfig value) {
this.tesseractConfig = value;
return this;
}
/** Sets the outputFormat field. */
@JsonProperty("output_format")
public Builder withOutputFormat(final @Nullable OutputFormat value) {
this.outputFormat = value;
return this;
}
/** Sets the paddleOcrConfig field. */
@JsonProperty("paddle_ocr_config")
public Builder withPaddleOcrConfig(final @Nullable JsonNode value) {
this.paddleOcrConfig = value;
return this;
}
/** Sets the backendOptions field. */
@JsonProperty("backend_options")
public Builder withBackendOptions(final @Nullable JsonNode value) {
this.backendOptions = value;
return this;
}
/** Sets the elementConfig field. */
@JsonProperty("element_config")
public Builder withElementConfig(final @Nullable OcrElementConfig value) {
this.elementConfig = value;
return this;
}
/** Sets the qualityThresholds field. */
@JsonProperty("quality_thresholds")
public Builder withQualityThresholds(final @Nullable OcrQualityThresholds value) {
this.qualityThresholds = value;
return this;
}
/** Sets the pipeline field. */
@JsonProperty("pipeline")
public Builder withPipeline(final @Nullable OcrPipelineConfig value) {
this.pipeline = value;
return this;
}
/** Sets the autoRotate field. */
@JsonProperty("auto_rotate")
public Builder withAutoRotate(final @Nullable Boolean value) {
this.autoRotate = value;
return this;
}
/** Sets the vlmConfig field. */
@JsonProperty("vlm_config")
public Builder withVlmConfig(final @Nullable LlmConfig value) {
this.vlmConfig = value;
return this;
}
/** Sets the vlmPrompt field. */
@JsonProperty("vlm_prompt")
public Builder withVlmPrompt(final @Nullable String value) {
this.vlmPrompt = value;
return this;
}
/** Sets the acceleration field. */
@JsonProperty("acceleration")
public Builder withAcceleration(final @Nullable AccelerationConfig value) {
this.acceleration = value;
return this;
}
/** Sets the tessdataBytes field. */
@JsonProperty("tessdata_bytes")
public Builder withTessdataBytes(final @Nullable Map<String, byte[]> value) {
this.tessdataBytes = value;
return this;
}
/** Builds the OcrConfig instance. */
public OcrConfig build() {
return new OcrConfig(
enabled,
backend,
language,
tesseractConfig,
outputFormat,
paddleOcrConfig,
backendOptions,
elementConfig,
qualityThresholds,
pipeline,
autoRotate,
vlmConfig,
vlmPrompt,
acceleration,
tessdataBytes
);
}
}
// CPD-ON
public static OcrConfig defaultInstance() {
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
}
}