118 lines
4.2 KiB
Java
118 lines
4.2 KiB
Java
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
package dev.kreuzberg;
|
||
|
|
|
||
|
|
import java.util.List;
|
||
|
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||
|
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||
|
|
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||
|
|
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
||
|
|
import org.jspecify.annotations.Nullable;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Top-level structured document representation.
|
||
|
|
*
|
||
|
|
* A flat array of nodes with index-based parent/child references forming a tree.
|
||
|
|
* Root-level nodes have {@code parent: None}. Use {@code body_roots()} and {@code furniture_roots()}
|
||
|
|
* to iterate over top-level content by layer.
|
||
|
|
*
|
||
|
|
* # Validation
|
||
|
|
*
|
||
|
|
* Call {@code validate()} after construction to verify all node indices are in bounds
|
||
|
|
* and parent-child relationships are bidirectionally consistent.
|
||
|
|
*/
|
||
|
|
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
||
|
|
@JsonDeserialize(builder = DocumentStructure.Builder.class)
|
||
|
|
public record DocumentStructure(
|
||
|
|
/**
|
||
|
|
* All nodes in document/reading order.
|
||
|
|
*/
|
||
|
|
@JsonProperty("nodes") List<DocumentNode> nodes,
|
||
|
|
/**
|
||
|
|
* Origin format identifier (e.g. "docx", "pptx", "html", "pdf").
|
||
|
|
*
|
||
|
|
* Allows renderers to apply format-aware heuristics when converting
|
||
|
|
* the document tree to output formats.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("source_format") String sourceFormat,
|
||
|
|
/**
|
||
|
|
* Resolved relationships between nodes (footnote refs, citations, anchor links, etc.).
|
||
|
|
*
|
||
|
|
* Populated during derivation from the internal document representation.
|
||
|
|
* Empty when no relationships are detected.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("relationships") List<DocumentRelationship> relationships,
|
||
|
|
/**
|
||
|
|
* Sorted, deduplicated list of node type names present in this document.
|
||
|
|
*
|
||
|
|
* Each value is the snake_case {@code node_type} tag of the corresponding
|
||
|
|
* NodeContent variant (e.g. {@code "paragraph"}, {@code "heading"}, {@code "table"}, …).
|
||
|
|
*
|
||
|
|
* Computed from nodes via DocumentStructure.finalize_node_types.
|
||
|
|
* Empty until that method is called (internal construction paths call it
|
||
|
|
* at the end of derivation).
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("node_types") List<String> nodeTypes
|
||
|
|
) {
|
||
|
|
public static Builder builder() {
|
||
|
|
return new Builder();
|
||
|
|
}
|
||
|
|
|
||
|
|
// CPD-OFF
|
||
|
|
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
||
|
|
public static final class Builder {
|
||
|
|
|
||
|
|
private List<DocumentNode> nodes = List.of();
|
||
|
|
@JsonProperty("source_format")
|
||
|
|
private String sourceFormat = null;
|
||
|
|
private List<DocumentRelationship> relationships = null;
|
||
|
|
@JsonProperty("node_types")
|
||
|
|
private List<String> nodeTypes = null;
|
||
|
|
|
||
|
|
/** Sets the nodes field. */
|
||
|
|
@JsonProperty("nodes")
|
||
|
|
public Builder withNodes(final List<DocumentNode> value) {
|
||
|
|
this.nodes = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the sourceFormat field. */
|
||
|
|
@JsonProperty("source_format")
|
||
|
|
public Builder withSourceFormat(final @Nullable String value) {
|
||
|
|
this.sourceFormat = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the relationships field. */
|
||
|
|
@JsonProperty("relationships")
|
||
|
|
public Builder withRelationships(final @Nullable List<DocumentRelationship> value) {
|
||
|
|
this.relationships = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the nodeTypes field. */
|
||
|
|
@JsonProperty("node_types")
|
||
|
|
public Builder withNodeTypes(final @Nullable List<String> value) {
|
||
|
|
this.nodeTypes = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Builds the DocumentStructure instance. */
|
||
|
|
public DocumentStructure build() {
|
||
|
|
return new DocumentStructure(
|
||
|
|
nodes,
|
||
|
|
sourceFormat,
|
||
|
|
relationships,
|
||
|
|
nodeTypes
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// CPD-ON
|
||
|
|
public static DocumentStructure defaultInstance() {
|
||
|
|
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
|
||
|
|
}
|
||
|
|
}
|