Files
fil/packages/java/dev/kreuzberg/ChunkMetadata.java

178 lines
5.9 KiB
Java
Raw Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* Metadata about a chunk's position in the original document.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = ChunkMetadata.Builder.class)
public record ChunkMetadata(
/**
* Byte offset where this chunk starts in the original text (UTF-8 valid boundary).
*/
@JsonProperty("byte_start") long byteStart,
/**
* Byte offset where this chunk ends in the original text (UTF-8 valid boundary).
*/
@JsonProperty("byte_end") long byteEnd,
/**
* Number of tokens in this chunk (if available).
*
* This is calculated by the embedding model's tokenizer if embeddings are enabled.
*/
@Nullable @JsonProperty("token_count") Long tokenCount,
/**
* Zero-based index of this chunk in the document.
*/
@JsonProperty("chunk_index") long chunkIndex,
/**
* Total number of chunks in the document.
*/
@JsonProperty("total_chunks") long totalChunks,
/**
* First page number this chunk spans (1-indexed).
*
* Only populated when page tracking is enabled in extraction configuration.
*/
@Nullable @JsonProperty("first_page") Integer firstPage,
/**
* Last page number this chunk spans (1-indexed, equal to first_page for single-page chunks).
*
* Only populated when page tracking is enabled in extraction configuration.
*/
@Nullable @JsonProperty("last_page") Integer lastPage,
/**
* Heading context when using Markdown chunker.
*
* Contains the heading hierarchy this chunk falls under.
* Only populated when {@code ChunkerType.Markdown} is used.
*/
@Nullable @JsonProperty("heading_context") HeadingContext headingContext,
/**
* Indices into {@code ExtractionResult.images} for images on pages covered by this chunk.
*
* Contains zero-based indices into the top-level {@code images} collection for every
* image whose {@code page_number} falls within {@code [first_page, last_page]}.
* Empty when image extraction is disabled or the chunk spans no pages with images.
*/
@Nullable @JsonProperty("image_indices") List<Integer> imageIndices
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
@JsonProperty("byte_start")
private long byteStart = 0;
@JsonProperty("byte_end")
private long byteEnd = 0;
@JsonProperty("token_count")
private Long tokenCount = null;
@JsonProperty("chunk_index")
private long chunkIndex = 0;
@JsonProperty("total_chunks")
private long totalChunks = 0;
@JsonProperty("first_page")
private Integer firstPage = null;
@JsonProperty("last_page")
private Integer lastPage = null;
@JsonProperty("heading_context")
@Nullable private HeadingContext headingContext = null;
@JsonProperty("image_indices")
private List<Integer> imageIndices = null;
/** Sets the byteStart field. */
@JsonProperty("byte_start")
public Builder withByteStart(final long value) {
this.byteStart = value;
return this;
}
/** Sets the byteEnd field. */
@JsonProperty("byte_end")
public Builder withByteEnd(final long value) {
this.byteEnd = value;
return this;
}
/** Sets the tokenCount field. */
@JsonProperty("token_count")
public Builder withTokenCount(final @Nullable long value) {
this.tokenCount = value;
return this;
}
/** Sets the chunkIndex field. */
@JsonProperty("chunk_index")
public Builder withChunkIndex(final long value) {
this.chunkIndex = value;
return this;
}
/** Sets the totalChunks field. */
@JsonProperty("total_chunks")
public Builder withTotalChunks(final long value) {
this.totalChunks = value;
return this;
}
/** Sets the firstPage field. */
@JsonProperty("first_page")
public Builder withFirstPage(final @Nullable int value) {
this.firstPage = value;
return this;
}
/** Sets the lastPage field. */
@JsonProperty("last_page")
public Builder withLastPage(final @Nullable int value) {
this.lastPage = value;
return this;
}
/** Sets the headingContext field. */
@JsonProperty("heading_context")
public Builder withHeadingContext(final @Nullable HeadingContext value) {
this.headingContext = value;
return this;
}
/** Sets the imageIndices field. */
@JsonProperty("image_indices")
public Builder withImageIndices(final @Nullable List<Integer> value) {
this.imageIndices = value;
return this;
}
/** Builds the ChunkMetadata instance. */
public ChunkMetadata build() {
return new ChunkMetadata(
byteStart,
byteEnd,
tokenCount,
chunkIndex,
totalChunks,
firstPage,
lastPage,
headingContext,
imageIndices
);
}
}
// CPD-ON
}