Files

357 lines
12 KiB
Java
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* Extraction result metadata.
*
* Contains common fields applicable to all formats, format-specific metadata
* via a discriminated union, and additional custom fields from postprocessors.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = Metadata.Builder.class)
public record Metadata(
/**
* Document title
*/
@Nullable @JsonProperty("title") String title,
/**
* Document subject or description
*/
@Nullable @JsonProperty("subject") String subject,
/**
* Primary author(s) - always Vec for consistency
*/
@Nullable @JsonProperty("authors") List<String> authors,
/**
* Keywords/tags - always Vec for consistency
*/
@Nullable @JsonProperty("keywords") List<String> keywords,
/**
* Primary language (ISO 639 code)
*/
@Nullable @JsonProperty("language") String language,
/**
* Creation timestamp (ISO 8601 format)
*/
@Nullable @JsonProperty("created_at") String createdAt,
/**
* Last modification timestamp (ISO 8601 format)
*/
@Nullable @JsonProperty("modified_at") String modifiedAt,
/**
* User who created the document
*/
@Nullable @JsonProperty("created_by") String createdBy,
/**
* User who last modified the document
*/
@Nullable @JsonProperty("modified_by") String modifiedBy,
/**
* Page/slide/sheet structure with boundaries
*/
@Nullable @JsonProperty("pages") PageStructure pages,
/**
* Format-specific metadata (discriminated union)
*
* Contains detailed metadata specific to the document format.
* Serialized as a nested {@code "format"} object with a {@code format_type} discriminator field.
*/
@JsonDeserialize(using = FormatMetadataDeserializer.class) @Nullable @JsonProperty("format") FormatMetadata format,
/**
* Image preprocessing metadata (when OCR preprocessing was applied)
*/
@Nullable @JsonProperty("image_preprocessing") ImagePreprocessingMetadata imagePreprocessing,
/**
* JSON schema (for structured data extraction)
*/
@Nullable @JsonProperty("json_schema") JsonNode jsonSchema,
/**
* Error metadata (for batch operations)
*/
@Nullable @JsonProperty("error") ErrorMetadata error,
/**
* Extraction duration in milliseconds (for benchmarking).
*
* This field is populated by batch extraction to provide per-file timing
* information. It's {@code None} for single-file extraction (which uses external timing).
*/
@Nullable @JsonProperty("extraction_duration_ms") Long extractionDurationMs,
/**
* Document category (from frontmatter or classification).
*/
@Nullable @JsonProperty("category") String category,
/**
* Document tags (from frontmatter).
*/
@Nullable @JsonProperty("tags") List<String> tags,
/**
* Document version string (from frontmatter).
*/
@Nullable @JsonProperty("document_version") String documentVersion,
/**
* Abstract or summary text (from frontmatter).
*/
@Nullable @JsonProperty("abstract_text") String abstractText,
/**
* Output format identifier (e.g., "markdown", "html", "text").
*
* Set by the output format pipeline stage when format conversion is applied.
* Previously stored in {@code metadata.additional["output_format"]}.
*/
@Nullable @JsonProperty("output_format") String outputFormat,
/**
* Whether OCR was used during extraction.
*
* Set to {@code true} whenever the extraction pipeline ran an OCR backend
* (Tesseract, PaddleOCR, VLM, etc.) and used that output as the primary
* or fallback text. {@code false} means native text extraction was used exclusively.
*/
@Nullable @JsonProperty("ocr_used") Boolean ocrUsed,
/**
* Additional custom fields from postprocessors.
*
* Serialized as a nested {@code "additional"} object (not flattened at root level).
* Uses {@code Cow&lt;'static, str&gt;} keys so static string keys avoid allocation.
*/
@Nullable @JsonProperty("additional") Map<String, JsonNode> additional
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private String title = null;
private String subject = null;
private List<String> authors = null;
private List<String> keywords = null;
private String language = null;
@JsonProperty("created_at")
private String createdAt = null;
@JsonProperty("modified_at")
private String modifiedAt = null;
@JsonProperty("created_by")
private String createdBy = null;
@JsonProperty("modified_by")
private String modifiedBy = null;
private PageStructure pages = null;
private FormatMetadata format = null;
@JsonProperty("image_preprocessing")
private ImagePreprocessingMetadata imagePreprocessing = null;
@JsonProperty("json_schema")
private JsonNode jsonSchema = null;
private ErrorMetadata error = null;
@JsonProperty("extraction_duration_ms")
private Long extractionDurationMs = null;
private String category = null;
private List<String> tags = null;
@JsonProperty("document_version")
private String documentVersion = null;
@JsonProperty("abstract_text")
private String abstractText = null;
@JsonProperty("output_format")
private String outputFormat = null;
@JsonProperty("ocr_used")
private Boolean ocrUsed = null;
private Map<String, JsonNode> additional = null;
/** Sets the title field. */
@JsonProperty("title")
public Builder withTitle(final @Nullable String value) {
this.title = value;
return this;
}
/** Sets the subject field. */
@JsonProperty("subject")
public Builder withSubject(final @Nullable String value) {
this.subject = value;
return this;
}
/** Sets the authors field. */
@JsonProperty("authors")
public Builder withAuthors(final @Nullable List<String> value) {
this.authors = value;
return this;
}
/** Sets the keywords field. */
@JsonProperty("keywords")
public Builder withKeywords(final @Nullable List<String> value) {
this.keywords = value;
return this;
}
/** Sets the language field. */
@JsonProperty("language")
public Builder withLanguage(final @Nullable String value) {
this.language = value;
return this;
}
/** Sets the createdAt field. */
@JsonProperty("created_at")
public Builder withCreatedAt(final @Nullable String value) {
this.createdAt = value;
return this;
}
/** Sets the modifiedAt field. */
@JsonProperty("modified_at")
public Builder withModifiedAt(final @Nullable String value) {
this.modifiedAt = value;
return this;
}
/** Sets the createdBy field. */
@JsonProperty("created_by")
public Builder withCreatedBy(final @Nullable String value) {
this.createdBy = value;
return this;
}
/** Sets the modifiedBy field. */
@JsonProperty("modified_by")
public Builder withModifiedBy(final @Nullable String value) {
this.modifiedBy = value;
return this;
}
/** Sets the pages field. */
@JsonProperty("pages")
public Builder withPages(final @Nullable PageStructure value) {
this.pages = value;
return this;
}
/** Sets the format field. */
@JsonProperty("format")
public Builder withFormat(final @Nullable FormatMetadata value) {
this.format = value;
return this;
}
/** Sets the imagePreprocessing field. */
@JsonProperty("image_preprocessing")
public Builder withImagePreprocessing(final @Nullable ImagePreprocessingMetadata value) {
this.imagePreprocessing = value;
return this;
}
/** Sets the jsonSchema field. */
@JsonProperty("json_schema")
public Builder withJsonSchema(final @Nullable JsonNode value) {
this.jsonSchema = value;
return this;
}
/** Sets the error field. */
@JsonProperty("error")
public Builder withError(final @Nullable ErrorMetadata value) {
this.error = value;
return this;
}
/** Sets the extractionDurationMs field. */
@JsonProperty("extraction_duration_ms")
public Builder withExtractionDurationMs(final @Nullable long value) {
this.extractionDurationMs = value;
return this;
}
/** Sets the category field. */
@JsonProperty("category")
public Builder withCategory(final @Nullable String value) {
this.category = value;
return this;
}
/** Sets the tags field. */
@JsonProperty("tags")
public Builder withTags(final @Nullable List<String> value) {
this.tags = value;
return this;
}
/** Sets the documentVersion field. */
@JsonProperty("document_version")
public Builder withDocumentVersion(final @Nullable String value) {
this.documentVersion = value;
return this;
}
/** Sets the abstractText field. */
@JsonProperty("abstract_text")
public Builder withAbstractText(final @Nullable String value) {
this.abstractText = value;
return this;
}
/** Sets the outputFormat field. */
@JsonProperty("output_format")
public Builder withOutputFormat(final @Nullable String value) {
this.outputFormat = value;
return this;
}
/** Sets the ocrUsed field. */
@JsonProperty("ocr_used")
public Builder withOcrUsed(final @Nullable Boolean value) {
this.ocrUsed = value;
return this;
}
/** Sets the additional field. */
@JsonProperty("additional")
public Builder withAdditional(final @Nullable Map<String, JsonNode> value) {
this.additional = value;
return this;
}
/** Builds the Metadata instance. */
public Metadata build() {
return new Metadata(
title,
subject,
authors,
keywords,
language,
createdAt,
modifiedAt,
createdBy,
modifiedBy,
pages,
format,
imagePreprocessing,
jsonSchema,
error,
extractionDurationMs,
category,
tags,
documentVersion,
abstractText,
outputFormat,
ocrUsed,
additional
);
}
}
// CPD-ON
}