This commit is contained in:
356
packages/java/dev/kreuzberg/Metadata.java
generated
Normal file
356
packages/java/dev/kreuzberg/Metadata.java
generated
Normal file
@@ -0,0 +1,356 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
package dev.kreuzberg;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||||
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
||||
import org.jspecify.annotations.Nullable;
|
||||
|
||||
/**
|
||||
* Extraction result metadata.
|
||||
*
|
||||
* Contains common fields applicable to all formats, format-specific metadata
|
||||
* via a discriminated union, and additional custom fields from postprocessors.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
||||
@JsonDeserialize(builder = Metadata.Builder.class)
|
||||
public record Metadata(
|
||||
/**
|
||||
* Document title
|
||||
*/
|
||||
@Nullable @JsonProperty("title") String title,
|
||||
/**
|
||||
* Document subject or description
|
||||
*/
|
||||
@Nullable @JsonProperty("subject") String subject,
|
||||
/**
|
||||
* Primary author(s) - always Vec for consistency
|
||||
*/
|
||||
@Nullable @JsonProperty("authors") List<String> authors,
|
||||
/**
|
||||
* Keywords/tags - always Vec for consistency
|
||||
*/
|
||||
@Nullable @JsonProperty("keywords") List<String> keywords,
|
||||
/**
|
||||
* Primary language (ISO 639 code)
|
||||
*/
|
||||
@Nullable @JsonProperty("language") String language,
|
||||
/**
|
||||
* Creation timestamp (ISO 8601 format)
|
||||
*/
|
||||
@Nullable @JsonProperty("created_at") String createdAt,
|
||||
/**
|
||||
* Last modification timestamp (ISO 8601 format)
|
||||
*/
|
||||
@Nullable @JsonProperty("modified_at") String modifiedAt,
|
||||
/**
|
||||
* User who created the document
|
||||
*/
|
||||
@Nullable @JsonProperty("created_by") String createdBy,
|
||||
/**
|
||||
* User who last modified the document
|
||||
*/
|
||||
@Nullable @JsonProperty("modified_by") String modifiedBy,
|
||||
/**
|
||||
* Page/slide/sheet structure with boundaries
|
||||
*/
|
||||
@Nullable @JsonProperty("pages") PageStructure pages,
|
||||
/**
|
||||
* Format-specific metadata (discriminated union)
|
||||
*
|
||||
* Contains detailed metadata specific to the document format.
|
||||
* Serialized as a nested {@code "format"} object with a {@code format_type} discriminator field.
|
||||
*/
|
||||
@JsonDeserialize(using = FormatMetadataDeserializer.class) @Nullable @JsonProperty("format") FormatMetadata format,
|
||||
/**
|
||||
* Image preprocessing metadata (when OCR preprocessing was applied)
|
||||
*/
|
||||
@Nullable @JsonProperty("image_preprocessing") ImagePreprocessingMetadata imagePreprocessing,
|
||||
/**
|
||||
* JSON schema (for structured data extraction)
|
||||
*/
|
||||
@Nullable @JsonProperty("json_schema") JsonNode jsonSchema,
|
||||
/**
|
||||
* Error metadata (for batch operations)
|
||||
*/
|
||||
@Nullable @JsonProperty("error") ErrorMetadata error,
|
||||
/**
|
||||
* Extraction duration in milliseconds (for benchmarking).
|
||||
*
|
||||
* This field is populated by batch extraction to provide per-file timing
|
||||
* information. It's {@code None} for single-file extraction (which uses external timing).
|
||||
*/
|
||||
@Nullable @JsonProperty("extraction_duration_ms") Long extractionDurationMs,
|
||||
/**
|
||||
* Document category (from frontmatter or classification).
|
||||
*/
|
||||
@Nullable @JsonProperty("category") String category,
|
||||
/**
|
||||
* Document tags (from frontmatter).
|
||||
*/
|
||||
@Nullable @JsonProperty("tags") List<String> tags,
|
||||
/**
|
||||
* Document version string (from frontmatter).
|
||||
*/
|
||||
@Nullable @JsonProperty("document_version") String documentVersion,
|
||||
/**
|
||||
* Abstract or summary text (from frontmatter).
|
||||
*/
|
||||
@Nullable @JsonProperty("abstract_text") String abstractText,
|
||||
/**
|
||||
* Output format identifier (e.g., "markdown", "html", "text").
|
||||
*
|
||||
* Set by the output format pipeline stage when format conversion is applied.
|
||||
* Previously stored in {@code metadata.additional["output_format"]}.
|
||||
*/
|
||||
@Nullable @JsonProperty("output_format") String outputFormat,
|
||||
/**
|
||||
* Whether OCR was used during extraction.
|
||||
*
|
||||
* Set to {@code true} whenever the extraction pipeline ran an OCR backend
|
||||
* (Tesseract, PaddleOCR, VLM, etc.) and used that output as the primary
|
||||
* or fallback text. {@code false} means native text extraction was used exclusively.
|
||||
*/
|
||||
@Nullable @JsonProperty("ocr_used") Boolean ocrUsed,
|
||||
/**
|
||||
* Additional custom fields from postprocessors.
|
||||
*
|
||||
* Serialized as a nested {@code "additional"} object (not flattened at root level).
|
||||
* Uses {@code Cow<'static, str>} keys so static string keys avoid allocation.
|
||||
*/
|
||||
@Nullable @JsonProperty("additional") Map<String, JsonNode> additional
|
||||
) {
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
// CPD-OFF
|
||||
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
||||
public static final class Builder {
|
||||
|
||||
private String title = null;
|
||||
private String subject = null;
|
||||
private List<String> authors = null;
|
||||
private List<String> keywords = null;
|
||||
private String language = null;
|
||||
@JsonProperty("created_at")
|
||||
private String createdAt = null;
|
||||
@JsonProperty("modified_at")
|
||||
private String modifiedAt = null;
|
||||
@JsonProperty("created_by")
|
||||
private String createdBy = null;
|
||||
@JsonProperty("modified_by")
|
||||
private String modifiedBy = null;
|
||||
private PageStructure pages = null;
|
||||
private FormatMetadata format = null;
|
||||
@JsonProperty("image_preprocessing")
|
||||
private ImagePreprocessingMetadata imagePreprocessing = null;
|
||||
@JsonProperty("json_schema")
|
||||
private JsonNode jsonSchema = null;
|
||||
private ErrorMetadata error = null;
|
||||
@JsonProperty("extraction_duration_ms")
|
||||
private Long extractionDurationMs = null;
|
||||
private String category = null;
|
||||
private List<String> tags = null;
|
||||
@JsonProperty("document_version")
|
||||
private String documentVersion = null;
|
||||
@JsonProperty("abstract_text")
|
||||
private String abstractText = null;
|
||||
@JsonProperty("output_format")
|
||||
private String outputFormat = null;
|
||||
@JsonProperty("ocr_used")
|
||||
private Boolean ocrUsed = null;
|
||||
private Map<String, JsonNode> additional = null;
|
||||
|
||||
/** Sets the title field. */
|
||||
@JsonProperty("title")
|
||||
public Builder withTitle(final @Nullable String value) {
|
||||
this.title = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the subject field. */
|
||||
@JsonProperty("subject")
|
||||
public Builder withSubject(final @Nullable String value) {
|
||||
this.subject = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the authors field. */
|
||||
@JsonProperty("authors")
|
||||
public Builder withAuthors(final @Nullable List<String> value) {
|
||||
this.authors = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the keywords field. */
|
||||
@JsonProperty("keywords")
|
||||
public Builder withKeywords(final @Nullable List<String> value) {
|
||||
this.keywords = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the language field. */
|
||||
@JsonProperty("language")
|
||||
public Builder withLanguage(final @Nullable String value) {
|
||||
this.language = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the createdAt field. */
|
||||
@JsonProperty("created_at")
|
||||
public Builder withCreatedAt(final @Nullable String value) {
|
||||
this.createdAt = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the modifiedAt field. */
|
||||
@JsonProperty("modified_at")
|
||||
public Builder withModifiedAt(final @Nullable String value) {
|
||||
this.modifiedAt = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the createdBy field. */
|
||||
@JsonProperty("created_by")
|
||||
public Builder withCreatedBy(final @Nullable String value) {
|
||||
this.createdBy = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the modifiedBy field. */
|
||||
@JsonProperty("modified_by")
|
||||
public Builder withModifiedBy(final @Nullable String value) {
|
||||
this.modifiedBy = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the pages field. */
|
||||
@JsonProperty("pages")
|
||||
public Builder withPages(final @Nullable PageStructure value) {
|
||||
this.pages = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the format field. */
|
||||
@JsonProperty("format")
|
||||
public Builder withFormat(final @Nullable FormatMetadata value) {
|
||||
this.format = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the imagePreprocessing field. */
|
||||
@JsonProperty("image_preprocessing")
|
||||
public Builder withImagePreprocessing(final @Nullable ImagePreprocessingMetadata value) {
|
||||
this.imagePreprocessing = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the jsonSchema field. */
|
||||
@JsonProperty("json_schema")
|
||||
public Builder withJsonSchema(final @Nullable JsonNode value) {
|
||||
this.jsonSchema = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the error field. */
|
||||
@JsonProperty("error")
|
||||
public Builder withError(final @Nullable ErrorMetadata value) {
|
||||
this.error = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the extractionDurationMs field. */
|
||||
@JsonProperty("extraction_duration_ms")
|
||||
public Builder withExtractionDurationMs(final @Nullable long value) {
|
||||
this.extractionDurationMs = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the category field. */
|
||||
@JsonProperty("category")
|
||||
public Builder withCategory(final @Nullable String value) {
|
||||
this.category = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the tags field. */
|
||||
@JsonProperty("tags")
|
||||
public Builder withTags(final @Nullable List<String> value) {
|
||||
this.tags = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the documentVersion field. */
|
||||
@JsonProperty("document_version")
|
||||
public Builder withDocumentVersion(final @Nullable String value) {
|
||||
this.documentVersion = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the abstractText field. */
|
||||
@JsonProperty("abstract_text")
|
||||
public Builder withAbstractText(final @Nullable String value) {
|
||||
this.abstractText = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the outputFormat field. */
|
||||
@JsonProperty("output_format")
|
||||
public Builder withOutputFormat(final @Nullable String value) {
|
||||
this.outputFormat = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the ocrUsed field. */
|
||||
@JsonProperty("ocr_used")
|
||||
public Builder withOcrUsed(final @Nullable Boolean value) {
|
||||
this.ocrUsed = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the additional field. */
|
||||
@JsonProperty("additional")
|
||||
public Builder withAdditional(final @Nullable Map<String, JsonNode> value) {
|
||||
this.additional = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Builds the Metadata instance. */
|
||||
public Metadata build() {
|
||||
return new Metadata(
|
||||
title,
|
||||
subject,
|
||||
authors,
|
||||
keywords,
|
||||
language,
|
||||
createdAt,
|
||||
modifiedAt,
|
||||
createdBy,
|
||||
modifiedBy,
|
||||
pages,
|
||||
format,
|
||||
imagePreprocessing,
|
||||
jsonSchema,
|
||||
error,
|
||||
extractionDurationMs,
|
||||
category,
|
||||
tags,
|
||||
documentVersion,
|
||||
abstractText,
|
||||
outputFormat,
|
||||
ocrUsed,
|
||||
additional
|
||||
);
|
||||
}
|
||||
}
|
||||
// CPD-ON
|
||||
}
|
||||
Reference in New Issue
Block a user