180 lines
5.6 KiB
Java
180 lines
5.6 KiB
Java
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
package dev.kreuzberg;
|
||
|
|
|
||
|
|
import java.util.List;
|
||
|
|
import java.util.Map;
|
||
|
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||
|
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||
|
|
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||
|
|
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
||
|
|
import org.jspecify.annotations.Nullable;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* A single node in the document tree.
|
||
|
|
*
|
||
|
|
* Each node has deterministic {@code id}, typed {@code content}, optional {@code parent}/{@code children}
|
||
|
|
* for tree structure, and metadata like page number, bounding box, and content layer.
|
||
|
|
*/
|
||
|
|
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
||
|
|
@JsonDeserialize(builder = DocumentNode.Builder.class)
|
||
|
|
public record DocumentNode(
|
||
|
|
/**
|
||
|
|
* Deterministic identifier (hash of content + position).
|
||
|
|
*/
|
||
|
|
@JsonProperty("id") String id,
|
||
|
|
/**
|
||
|
|
* Node content — tagged enum, type-specific data only.
|
||
|
|
*/
|
||
|
|
@JsonProperty("content") NodeContent content,
|
||
|
|
/**
|
||
|
|
* Parent node index ({@code None} = root-level node).
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("parent") Integer parent,
|
||
|
|
/**
|
||
|
|
* Child node indices in reading order.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("children") List<Integer> children,
|
||
|
|
/**
|
||
|
|
* Content layer classification.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("content_layer") ContentLayer contentLayer,
|
||
|
|
/**
|
||
|
|
* Page number where this node starts (1-indexed).
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("page") Integer page,
|
||
|
|
/**
|
||
|
|
* Page number where this node ends (for multi-page tables/sections).
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("page_end") Integer pageEnd,
|
||
|
|
/**
|
||
|
|
* Bounding box in document coordinates.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("bbox") BoundingBox bbox,
|
||
|
|
/**
|
||
|
|
* Inline annotations (formatting, links) on this node's text content.
|
||
|
|
*
|
||
|
|
* Only meaningful for text-carrying nodes; empty for containers.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("annotations") List<TextAnnotation> annotations,
|
||
|
|
/**
|
||
|
|
* Format-specific key-value attributes.
|
||
|
|
*
|
||
|
|
* Extensible bag for miscellaneous data without a dedicated typed field: CSS classes,
|
||
|
|
* LaTeX environment names, Excel cell formulas, slide layout names, etc.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("attributes") Map<String, String> attributes
|
||
|
|
) {
|
||
|
|
public static Builder builder() {
|
||
|
|
return new Builder();
|
||
|
|
}
|
||
|
|
|
||
|
|
// CPD-OFF
|
||
|
|
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
||
|
|
public static final class Builder {
|
||
|
|
|
||
|
|
private String id = "";
|
||
|
|
private NodeContent content = null;
|
||
|
|
private Integer parent = null;
|
||
|
|
private List<Integer> children = null;
|
||
|
|
@JsonProperty("content_layer")
|
||
|
|
@Nullable private ContentLayer contentLayer = ContentLayer.Body;
|
||
|
|
private Integer page = null;
|
||
|
|
@JsonProperty("page_end")
|
||
|
|
private Integer pageEnd = null;
|
||
|
|
private BoundingBox bbox = null;
|
||
|
|
private List<TextAnnotation> annotations = null;
|
||
|
|
private Map<String, String> attributes = null;
|
||
|
|
|
||
|
|
/** Sets the id field. */
|
||
|
|
@JsonProperty("id")
|
||
|
|
public Builder withId(final String value) {
|
||
|
|
this.id = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the content field. */
|
||
|
|
@JsonProperty("content")
|
||
|
|
public Builder withContent(final NodeContent value) {
|
||
|
|
this.content = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the parent field. */
|
||
|
|
@JsonProperty("parent")
|
||
|
|
public Builder withParent(final @Nullable int value) {
|
||
|
|
this.parent = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the children field. */
|
||
|
|
@JsonProperty("children")
|
||
|
|
public Builder withChildren(final @Nullable List<Integer> value) {
|
||
|
|
this.children = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the contentLayer field. */
|
||
|
|
@JsonProperty("content_layer")
|
||
|
|
public Builder withContentLayer(final @Nullable ContentLayer value) {
|
||
|
|
this.contentLayer = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the page field. */
|
||
|
|
@JsonProperty("page")
|
||
|
|
public Builder withPage(final @Nullable int value) {
|
||
|
|
this.page = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the pageEnd field. */
|
||
|
|
@JsonProperty("page_end")
|
||
|
|
public Builder withPageEnd(final @Nullable int value) {
|
||
|
|
this.pageEnd = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the bbox field. */
|
||
|
|
@JsonProperty("bbox")
|
||
|
|
public Builder withBbox(final @Nullable BoundingBox value) {
|
||
|
|
this.bbox = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the annotations field. */
|
||
|
|
@JsonProperty("annotations")
|
||
|
|
public Builder withAnnotations(final @Nullable List<TextAnnotation> value) {
|
||
|
|
this.annotations = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the attributes field. */
|
||
|
|
@JsonProperty("attributes")
|
||
|
|
public Builder withAttributes(final @Nullable Map<String, String> value) {
|
||
|
|
this.attributes = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Builds the DocumentNode instance. */
|
||
|
|
public DocumentNode build() {
|
||
|
|
return new DocumentNode(
|
||
|
|
id,
|
||
|
|
content,
|
||
|
|
parent,
|
||
|
|
children,
|
||
|
|
contentLayer,
|
||
|
|
page,
|
||
|
|
pageEnd,
|
||
|
|
bbox,
|
||
|
|
annotations,
|
||
|
|
attributes
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// CPD-ON
|
||
|
|
}
|