299 lines
11 KiB
Java
Generated
299 lines
11 KiB
Java
Generated
// This file is auto-generated by alef — DO NOT EDIT.
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
// To regenerate: alef generate
|
|
// To verify freshness: alef verify --exit-code
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
package dev.kreuzberg;
|
|
|
|
import java.util.Map;
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
|
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
|
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
|
import org.jspecify.annotations.Nullable;
|
|
|
|
/**
|
|
* OCR configuration.
|
|
*/
|
|
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
|
@JsonDeserialize(builder = OcrConfig.Builder.class)
|
|
public record OcrConfig(
|
|
/**
|
|
* Whether OCR is enabled.
|
|
*
|
|
* Setting {@code enabled: false} is a shorthand for {@code disable_ocr: true} on the parent
|
|
* ExtractionConfig(crate.core.config.ExtractionConfig). Images return
|
|
* metadata only; PDFs use native text extraction without OCR fallback.
|
|
*
|
|
* Defaults to {@code true}. When {@code false}, all other OCR settings are ignored.
|
|
*/
|
|
@Nullable @JsonProperty("enabled") Boolean enabled,
|
|
/**
|
|
* OCR backend: tesseract, easyocr, paddleocr
|
|
*/
|
|
@Nullable @JsonProperty("backend") String backend,
|
|
/**
|
|
* Language code (e.g., "eng", "deu")
|
|
*/
|
|
@Nullable @JsonProperty("language") String language,
|
|
/**
|
|
* Tesseract-specific configuration (optional)
|
|
*/
|
|
@Nullable @JsonProperty("tesseract_config") TesseractConfig tesseractConfig,
|
|
/**
|
|
* Output format for OCR results (optional, for format conversion)
|
|
*/
|
|
@Nullable @JsonProperty("output_format") OutputFormat outputFormat,
|
|
/**
|
|
* PaddleOCR-specific configuration (optional, JSON passthrough)
|
|
*/
|
|
@Nullable @JsonProperty("paddle_ocr_config") JsonNode paddleOcrConfig,
|
|
/**
|
|
* Arbitrary per-call options passed through to the backend unchanged.
|
|
*
|
|
* Custom OCR backends and built-in backends that support runtime tuning
|
|
* can read this value and deserialize the keys they care about. Keys
|
|
* unknown to the backend are silently ignored.
|
|
*
|
|
* This is the recommended extension point for per-call parameters that
|
|
* are not covered by the typed fields above (e.g. mode switching,
|
|
* preprocessing flags, inference batch size).
|
|
*
|
|
* **Scope:** when {@code pipeline} is {@code None}, this value is propagated to the
|
|
* primary stage of the auto-constructed pipeline. When {@code pipeline} is
|
|
* explicitly set, this field has **no effect** — the caller must set
|
|
* {@code OcrPipelineStage.backend_options} directly on the relevant stage(s)
|
|
* instead.
|
|
*
|
|
* Example:
|
|
* {@code }{@code json}
|
|
* { "mode": "fast", "enable_layout": true, "timeout_ms": 5000 }
|
|
*
|
|
*/
|
|
@Nullable @JsonProperty("backend_options") JsonNode backendOptions,
|
|
/**
|
|
* OCR element extraction configuration
|
|
*/
|
|
@Nullable @JsonProperty("element_config") OcrElementConfig elementConfig,
|
|
/**
|
|
* Quality thresholds for the native-text-to-OCR fallback decision.
|
|
* When null, uses compiled defaults (matching previous hardcoded behavior).
|
|
*/
|
|
@Nullable @JsonProperty("quality_thresholds") OcrQualityThresholds qualityThresholds,
|
|
/**
|
|
* Multi-backend OCR pipeline configuration. When set, enables weighted
|
|
* fallback across multiple OCR backends based on output quality.
|
|
* When null, uses the single {@code backend} field (same as today).
|
|
*/
|
|
@Nullable @JsonProperty("pipeline") OcrPipelineConfig pipeline,
|
|
/**
|
|
* Enable automatic page rotation based on orientation detection.
|
|
*
|
|
* When enabled, uses Tesseract's {@code DetectOrientationScript()} to detect
|
|
* page orientation (0/90/180/270 degrees) before OCR. If the page is
|
|
* rotated with high confidence, the image is corrected before recognition.
|
|
* This is critical for handling rotated scanned documents.
|
|
*/
|
|
@Nullable @JsonProperty("auto_rotate") Boolean autoRotate,
|
|
/**
|
|
* VLM (Vision Language Model) OCR configuration.
|
|
*
|
|
* Required when {@code backend} is {@code "vlm"}. Uses liter-llm to send page
|
|
* images to a vision model for text extraction.
|
|
*/
|
|
@Nullable @JsonProperty("vlm_config") LlmConfig vlmConfig,
|
|
/**
|
|
* Custom Jinja2 prompt template for VLM OCR.
|
|
*
|
|
* When {@code None}, uses the default template. Available variables:
|
|
* - {@code {{ language }}} — The document language code (e.g., "eng", "deu").
|
|
*/
|
|
@Nullable @JsonProperty("vlm_prompt") String vlmPrompt,
|
|
/**
|
|
* Hardware acceleration for ONNX Runtime models (e.g. PaddleOCR, layout detection).
|
|
*
|
|
* Not user-configurable via config files — injected at runtime from
|
|
* {@code ExtractionConfig.acceleration} before each {@code process_image} call.
|
|
*/
|
|
@Nullable @JsonProperty("acceleration") AccelerationConfig acceleration,
|
|
/**
|
|
* Caller-supplied Tesseract {@code traineddata} bytes per language code.
|
|
*
|
|
* Primary use case is the WASM build, which has no filesystem and cannot
|
|
* download tessdata at runtime. Native builds typically rely on
|
|
* {@code TessdataManager} and ignore this field. When present, the WASM
|
|
* Tesseract backend prefers these bytes over its compile-time-bundled
|
|
* English data.
|
|
*
|
|
* Skipped by serde to keep config files small — supply via the typed API
|
|
* at runtime.
|
|
*/
|
|
@Nullable @JsonProperty("tessdata_bytes") Map<String, byte[]> tessdataBytes
|
|
) {
|
|
public static Builder builder() {
|
|
return new Builder();
|
|
}
|
|
|
|
// CPD-OFF
|
|
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
|
public static final class Builder {
|
|
|
|
private Boolean enabled = null;
|
|
private String backend = null;
|
|
private String language = null;
|
|
@JsonProperty("tesseract_config")
|
|
@Nullable private TesseractConfig tesseractConfig = null;
|
|
@JsonProperty("output_format")
|
|
@Nullable private OutputFormat outputFormat = null;
|
|
@JsonProperty("paddle_ocr_config")
|
|
private JsonNode paddleOcrConfig = null;
|
|
@JsonProperty("backend_options")
|
|
private JsonNode backendOptions = null;
|
|
@JsonProperty("element_config")
|
|
@Nullable private OcrElementConfig elementConfig = null;
|
|
@JsonProperty("quality_thresholds")
|
|
@Nullable private OcrQualityThresholds qualityThresholds = null;
|
|
@Nullable private OcrPipelineConfig pipeline = null;
|
|
@JsonProperty("auto_rotate")
|
|
private Boolean autoRotate = null;
|
|
@JsonProperty("vlm_config")
|
|
@Nullable private LlmConfig vlmConfig = null;
|
|
@JsonProperty("vlm_prompt")
|
|
private String vlmPrompt = null;
|
|
private AccelerationConfig acceleration = null;
|
|
@JsonProperty("tessdata_bytes")
|
|
private Map<String, byte[]> tessdataBytes = null;
|
|
|
|
/** Sets the enabled field. */
|
|
@JsonProperty("enabled")
|
|
public Builder withEnabled(final @Nullable Boolean value) {
|
|
this.enabled = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the backend field. */
|
|
@JsonProperty("backend")
|
|
public Builder withBackend(final @Nullable String value) {
|
|
this.backend = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the language field. */
|
|
@JsonProperty("language")
|
|
public Builder withLanguage(final @Nullable String value) {
|
|
this.language = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the tesseractConfig field. */
|
|
@JsonProperty("tesseract_config")
|
|
public Builder withTesseractConfig(final @Nullable TesseractConfig value) {
|
|
this.tesseractConfig = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the outputFormat field. */
|
|
@JsonProperty("output_format")
|
|
public Builder withOutputFormat(final @Nullable OutputFormat value) {
|
|
this.outputFormat = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the paddleOcrConfig field. */
|
|
@JsonProperty("paddle_ocr_config")
|
|
public Builder withPaddleOcrConfig(final @Nullable JsonNode value) {
|
|
this.paddleOcrConfig = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the backendOptions field. */
|
|
@JsonProperty("backend_options")
|
|
public Builder withBackendOptions(final @Nullable JsonNode value) {
|
|
this.backendOptions = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the elementConfig field. */
|
|
@JsonProperty("element_config")
|
|
public Builder withElementConfig(final @Nullable OcrElementConfig value) {
|
|
this.elementConfig = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the qualityThresholds field. */
|
|
@JsonProperty("quality_thresholds")
|
|
public Builder withQualityThresholds(final @Nullable OcrQualityThresholds value) {
|
|
this.qualityThresholds = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the pipeline field. */
|
|
@JsonProperty("pipeline")
|
|
public Builder withPipeline(final @Nullable OcrPipelineConfig value) {
|
|
this.pipeline = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the autoRotate field. */
|
|
@JsonProperty("auto_rotate")
|
|
public Builder withAutoRotate(final @Nullable Boolean value) {
|
|
this.autoRotate = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the vlmConfig field. */
|
|
@JsonProperty("vlm_config")
|
|
public Builder withVlmConfig(final @Nullable LlmConfig value) {
|
|
this.vlmConfig = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the vlmPrompt field. */
|
|
@JsonProperty("vlm_prompt")
|
|
public Builder withVlmPrompt(final @Nullable String value) {
|
|
this.vlmPrompt = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the acceleration field. */
|
|
@JsonProperty("acceleration")
|
|
public Builder withAcceleration(final @Nullable AccelerationConfig value) {
|
|
this.acceleration = value;
|
|
return this;
|
|
}
|
|
|
|
/** Sets the tessdataBytes field. */
|
|
@JsonProperty("tessdata_bytes")
|
|
public Builder withTessdataBytes(final @Nullable Map<String, byte[]> value) {
|
|
this.tessdataBytes = value;
|
|
return this;
|
|
}
|
|
|
|
/** Builds the OcrConfig instance. */
|
|
public OcrConfig build() {
|
|
return new OcrConfig(
|
|
enabled,
|
|
backend,
|
|
language,
|
|
tesseractConfig,
|
|
outputFormat,
|
|
paddleOcrConfig,
|
|
backendOptions,
|
|
elementConfig,
|
|
qualityThresholds,
|
|
pipeline,
|
|
autoRotate,
|
|
vlmConfig,
|
|
vlmPrompt,
|
|
acceleration,
|
|
tessdataBytes
|
|
);
|
|
}
|
|
}
|
|
// CPD-ON
|
|
public static OcrConfig defaultInstance() {
|
|
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
|
|
}
|
|
}
|