Files
fil/packages/java/dev/kreuzberg/DocumentStructure.java

118 lines
4.2 KiB
Java
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* Top-level structured document representation.
*
* A flat array of nodes with index-based parent/child references forming a tree.
* Root-level nodes have {@code parent: None}. Use {@code body_roots()} and {@code furniture_roots()}
* to iterate over top-level content by layer.
*
* # Validation
*
* Call {@code validate()} after construction to verify all node indices are in bounds
* and parent-child relationships are bidirectionally consistent.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = DocumentStructure.Builder.class)
public record DocumentStructure(
/**
* All nodes in document/reading order.
*/
@JsonProperty("nodes") List<DocumentNode> nodes,
/**
* Origin format identifier (e.g. "docx", "pptx", "html", "pdf").
*
* Allows renderers to apply format-aware heuristics when converting
* the document tree to output formats.
*/
@Nullable @JsonProperty("source_format") String sourceFormat,
/**
* Resolved relationships between nodes (footnote refs, citations, anchor links, etc.).
*
* Populated during derivation from the internal document representation.
* Empty when no relationships are detected.
*/
@Nullable @JsonProperty("relationships") List<DocumentRelationship> relationships,
/**
* Sorted, deduplicated list of node type names present in this document.
*
* Each value is the snake_case {@code node_type} tag of the corresponding
* NodeContent variant (e.g. {@code "paragraph"}, {@code "heading"}, {@code "table"}, ).
*
* Computed from nodes via DocumentStructure.finalize_node_types.
* Empty until that method is called (internal construction paths call it
* at the end of derivation).
*/
@Nullable @JsonProperty("node_types") List<String> nodeTypes
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private List<DocumentNode> nodes = List.of();
@JsonProperty("source_format")
private String sourceFormat = null;
private List<DocumentRelationship> relationships = null;
@JsonProperty("node_types")
private List<String> nodeTypes = null;
/** Sets the nodes field. */
@JsonProperty("nodes")
public Builder withNodes(final List<DocumentNode> value) {
this.nodes = value;
return this;
}
/** Sets the sourceFormat field. */
@JsonProperty("source_format")
public Builder withSourceFormat(final @Nullable String value) {
this.sourceFormat = value;
return this;
}
/** Sets the relationships field. */
@JsonProperty("relationships")
public Builder withRelationships(final @Nullable List<DocumentRelationship> value) {
this.relationships = value;
return this;
}
/** Sets the nodeTypes field. */
@JsonProperty("node_types")
public Builder withNodeTypes(final @Nullable List<String> value) {
this.nodeTypes = value;
return this;
}
/** Builds the DocumentStructure instance. */
public DocumentStructure build() {
return new DocumentStructure(
nodes,
sourceFormat,
relationships,
nodeTypes
);
}
}
// CPD-ON
public static DocumentStructure defaultInstance() {
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
}
}