This commit is contained in:
151
packages/java/dev/kreuzberg/OcrElement.java
generated
Normal file
151
packages/java/dev/kreuzberg/OcrElement.java
generated
Normal file
@@ -0,0 +1,151 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
package dev.kreuzberg;
|
||||
|
||||
import java.util.Map;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||||
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
||||
import org.jspecify.annotations.Nullable;
|
||||
|
||||
/**
|
||||
* A unified OCR element representing detected text with full metadata.
|
||||
*
|
||||
* This is the primary type for structured OCR output, preserving all information
|
||||
* from both Tesseract and PaddleOCR backends.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
||||
@JsonDeserialize(builder = OcrElement.Builder.class)
|
||||
public record OcrElement(
|
||||
/**
|
||||
* The recognized text content.
|
||||
*/
|
||||
@JsonProperty("text") String text,
|
||||
/**
|
||||
* Bounding geometry (rectangle or quadrilateral).
|
||||
*/
|
||||
@JsonProperty("geometry") OcrBoundingGeometry geometry,
|
||||
/**
|
||||
* Confidence scores for detection and recognition.
|
||||
*/
|
||||
@JsonProperty("confidence") OcrConfidence confidence,
|
||||
/**
|
||||
* Hierarchical level (word, line, block, page).
|
||||
*/
|
||||
@Nullable @JsonProperty("level") OcrElementLevel level,
|
||||
/**
|
||||
* Rotation information (if detected).
|
||||
*/
|
||||
@Nullable @JsonProperty("rotation") OcrRotation rotation,
|
||||
/**
|
||||
* Page number (1-indexed).
|
||||
*/
|
||||
@Nullable @JsonProperty("page_number") Integer pageNumber,
|
||||
/**
|
||||
* Parent element ID for hierarchical relationships.
|
||||
*
|
||||
* Only used for Tesseract output which has word -> line -> block hierarchy.
|
||||
*/
|
||||
@Nullable @JsonProperty("parent_id") String parentId,
|
||||
/**
|
||||
* Backend-specific metadata that doesn't fit the unified schema.
|
||||
*/
|
||||
@Nullable @JsonProperty("backend_metadata") Map<String, JsonNode> backendMetadata
|
||||
) {
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
// CPD-OFF
|
||||
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
||||
public static final class Builder {
|
||||
|
||||
private String text = "";
|
||||
private OcrBoundingGeometry geometry = null;
|
||||
private OcrConfidence confidence = null;
|
||||
@Nullable private OcrElementLevel level = OcrElementLevel.Line;
|
||||
private OcrRotation rotation = null;
|
||||
@JsonProperty("page_number")
|
||||
private Integer pageNumber = null;
|
||||
@JsonProperty("parent_id")
|
||||
private String parentId = null;
|
||||
@JsonProperty("backend_metadata")
|
||||
private Map<String, JsonNode> backendMetadata = null;
|
||||
|
||||
/** Sets the text field. */
|
||||
@JsonProperty("text")
|
||||
public Builder withText(final String value) {
|
||||
this.text = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the geometry field. */
|
||||
@JsonProperty("geometry")
|
||||
public Builder withGeometry(final OcrBoundingGeometry value) {
|
||||
this.geometry = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the confidence field. */
|
||||
@JsonProperty("confidence")
|
||||
public Builder withConfidence(final OcrConfidence value) {
|
||||
this.confidence = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the level field. */
|
||||
@JsonProperty("level")
|
||||
public Builder withLevel(final @Nullable OcrElementLevel value) {
|
||||
this.level = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the rotation field. */
|
||||
@JsonProperty("rotation")
|
||||
public Builder withRotation(final @Nullable OcrRotation value) {
|
||||
this.rotation = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the pageNumber field. */
|
||||
@JsonProperty("page_number")
|
||||
public Builder withPageNumber(final @Nullable Integer value) {
|
||||
this.pageNumber = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the parentId field. */
|
||||
@JsonProperty("parent_id")
|
||||
public Builder withParentId(final @Nullable String value) {
|
||||
this.parentId = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the backendMetadata field. */
|
||||
@JsonProperty("backend_metadata")
|
||||
public Builder withBackendMetadata(final @Nullable Map<String, JsonNode> value) {
|
||||
this.backendMetadata = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Builds the OcrElement instance. */
|
||||
public OcrElement build() {
|
||||
return new OcrElement(
|
||||
text,
|
||||
geometry,
|
||||
confidence,
|
||||
level,
|
||||
rotation,
|
||||
pageNumber,
|
||||
parentId,
|
||||
backendMetadata
|
||||
);
|
||||
}
|
||||
}
|
||||
// CPD-ON
|
||||
}
|
||||
Reference in New Issue
Block a user