// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * Extraction result metadata. * * Contains common fields applicable to all formats, format-specific metadata * via a discriminated union, and additional custom fields from postprocessors. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = Metadata.Builder.class) public record Metadata( /** * Document title */ @Nullable @JsonProperty("title") String title, /** * Document subject or description */ @Nullable @JsonProperty("subject") String subject, /** * Primary author(s) - always Vec for consistency */ @Nullable @JsonProperty("authors") List authors, /** * Keywords/tags - always Vec for consistency */ @Nullable @JsonProperty("keywords") List keywords, /** * Primary language (ISO 639 code) */ @Nullable @JsonProperty("language") String language, /** * Creation timestamp (ISO 8601 format) */ @Nullable @JsonProperty("created_at") String createdAt, /** * Last modification timestamp (ISO 8601 format) */ @Nullable @JsonProperty("modified_at") String modifiedAt, /** * User who created the document */ @Nullable @JsonProperty("created_by") String createdBy, /** * User who last modified the document */ @Nullable @JsonProperty("modified_by") String modifiedBy, /** * Page/slide/sheet structure with boundaries */ @Nullable @JsonProperty("pages") PageStructure pages, /** * Format-specific metadata (discriminated union) * * Contains detailed metadata specific to the document format. * Serialized as a nested {@code "format"} object with a {@code format_type} discriminator field. */ @JsonDeserialize(using = FormatMetadataDeserializer.class) @Nullable @JsonProperty("format") FormatMetadata format, /** * Image preprocessing metadata (when OCR preprocessing was applied) */ @Nullable @JsonProperty("image_preprocessing") ImagePreprocessingMetadata imagePreprocessing, /** * JSON schema (for structured data extraction) */ @Nullable @JsonProperty("json_schema") JsonNode jsonSchema, /** * Error metadata (for batch operations) */ @Nullable @JsonProperty("error") ErrorMetadata error, /** * Extraction duration in milliseconds (for benchmarking). * * This field is populated by batch extraction to provide per-file timing * information. It's {@code None} for single-file extraction (which uses external timing). */ @Nullable @JsonProperty("extraction_duration_ms") Long extractionDurationMs, /** * Document category (from frontmatter or classification). */ @Nullable @JsonProperty("category") String category, /** * Document tags (from frontmatter). */ @Nullable @JsonProperty("tags") List tags, /** * Document version string (from frontmatter). */ @Nullable @JsonProperty("document_version") String documentVersion, /** * Abstract or summary text (from frontmatter). */ @Nullable @JsonProperty("abstract_text") String abstractText, /** * Output format identifier (e.g., "markdown", "html", "text"). * * Set by the output format pipeline stage when format conversion is applied. * Previously stored in {@code metadata.additional["output_format"]}. */ @Nullable @JsonProperty("output_format") String outputFormat, /** * Whether OCR was used during extraction. * * Set to {@code true} whenever the extraction pipeline ran an OCR backend * (Tesseract, PaddleOCR, VLM, etc.) and used that output as the primary * or fallback text. {@code false} means native text extraction was used exclusively. */ @Nullable @JsonProperty("ocr_used") Boolean ocrUsed, /** * Additional custom fields from postprocessors. * * Serialized as a nested {@code "additional"} object (not flattened at root level). * Uses {@code Cow<'static, str>} keys so static string keys avoid allocation. */ @Nullable @JsonProperty("additional") Map additional ) { public static Builder builder() { return new Builder(); } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { private String title = null; private String subject = null; private List authors = null; private List keywords = null; private String language = null; @JsonProperty("created_at") private String createdAt = null; @JsonProperty("modified_at") private String modifiedAt = null; @JsonProperty("created_by") private String createdBy = null; @JsonProperty("modified_by") private String modifiedBy = null; private PageStructure pages = null; private FormatMetadata format = null; @JsonProperty("image_preprocessing") private ImagePreprocessingMetadata imagePreprocessing = null; @JsonProperty("json_schema") private JsonNode jsonSchema = null; private ErrorMetadata error = null; @JsonProperty("extraction_duration_ms") private Long extractionDurationMs = null; private String category = null; private List tags = null; @JsonProperty("document_version") private String documentVersion = null; @JsonProperty("abstract_text") private String abstractText = null; @JsonProperty("output_format") private String outputFormat = null; @JsonProperty("ocr_used") private Boolean ocrUsed = null; private Map additional = null; /** Sets the title field. */ @JsonProperty("title") public Builder withTitle(final @Nullable String value) { this.title = value; return this; } /** Sets the subject field. */ @JsonProperty("subject") public Builder withSubject(final @Nullable String value) { this.subject = value; return this; } /** Sets the authors field. */ @JsonProperty("authors") public Builder withAuthors(final @Nullable List value) { this.authors = value; return this; } /** Sets the keywords field. */ @JsonProperty("keywords") public Builder withKeywords(final @Nullable List value) { this.keywords = value; return this; } /** Sets the language field. */ @JsonProperty("language") public Builder withLanguage(final @Nullable String value) { this.language = value; return this; } /** Sets the createdAt field. */ @JsonProperty("created_at") public Builder withCreatedAt(final @Nullable String value) { this.createdAt = value; return this; } /** Sets the modifiedAt field. */ @JsonProperty("modified_at") public Builder withModifiedAt(final @Nullable String value) { this.modifiedAt = value; return this; } /** Sets the createdBy field. */ @JsonProperty("created_by") public Builder withCreatedBy(final @Nullable String value) { this.createdBy = value; return this; } /** Sets the modifiedBy field. */ @JsonProperty("modified_by") public Builder withModifiedBy(final @Nullable String value) { this.modifiedBy = value; return this; } /** Sets the pages field. */ @JsonProperty("pages") public Builder withPages(final @Nullable PageStructure value) { this.pages = value; return this; } /** Sets the format field. */ @JsonProperty("format") public Builder withFormat(final @Nullable FormatMetadata value) { this.format = value; return this; } /** Sets the imagePreprocessing field. */ @JsonProperty("image_preprocessing") public Builder withImagePreprocessing(final @Nullable ImagePreprocessingMetadata value) { this.imagePreprocessing = value; return this; } /** Sets the jsonSchema field. */ @JsonProperty("json_schema") public Builder withJsonSchema(final @Nullable JsonNode value) { this.jsonSchema = value; return this; } /** Sets the error field. */ @JsonProperty("error") public Builder withError(final @Nullable ErrorMetadata value) { this.error = value; return this; } /** Sets the extractionDurationMs field. */ @JsonProperty("extraction_duration_ms") public Builder withExtractionDurationMs(final @Nullable long value) { this.extractionDurationMs = value; return this; } /** Sets the category field. */ @JsonProperty("category") public Builder withCategory(final @Nullable String value) { this.category = value; return this; } /** Sets the tags field. */ @JsonProperty("tags") public Builder withTags(final @Nullable List value) { this.tags = value; return this; } /** Sets the documentVersion field. */ @JsonProperty("document_version") public Builder withDocumentVersion(final @Nullable String value) { this.documentVersion = value; return this; } /** Sets the abstractText field. */ @JsonProperty("abstract_text") public Builder withAbstractText(final @Nullable String value) { this.abstractText = value; return this; } /** Sets the outputFormat field. */ @JsonProperty("output_format") public Builder withOutputFormat(final @Nullable String value) { this.outputFormat = value; return this; } /** Sets the ocrUsed field. */ @JsonProperty("ocr_used") public Builder withOcrUsed(final @Nullable Boolean value) { this.ocrUsed = value; return this; } /** Sets the additional field. */ @JsonProperty("additional") public Builder withAdditional(final @Nullable Map value) { this.additional = value; return this; } /** Builds the Metadata instance. */ public Metadata build() { return new Metadata( title, subject, authors, keywords, language, createdAt, modifiedAt, createdBy, modifiedBy, pages, format, imagePreprocessing, jsonSchema, error, extractionDurationMs, category, tags, documentVersion, abstractText, outputFormat, ocrUsed, additional ); } } // CPD-ON }