Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,211 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* PowerPoint (PPTX) extraction result.
*
* Contains extracted slide content, metadata, and embedded images/tables.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = PptxExtractionResult.Builder.class)
public record PptxExtractionResult(
/**
* Extracted text content from all slides
*/
@JsonProperty("content") String content,
/**
* Presentation metadata
*/
@JsonProperty("metadata") PptxMetadata metadata,
/**
* Total number of slides
*/
@JsonProperty("slide_count") long slideCount,
/**
* Total number of embedded images
*/
@JsonProperty("image_count") long imageCount,
/**
* Total number of tables
*/
@JsonProperty("table_count") long tableCount,
/**
* Extracted images from the presentation
*/
@JsonProperty("images") List<ExtractedImage> images,
/**
* Slide structure with boundaries (when page tracking is enabled)
*/
@Nullable @JsonProperty("page_structure") PageStructure pageStructure,
/**
* Per-slide content (when page tracking is enabled)
*/
@Nullable @JsonProperty("page_contents") List<PageContent> pageContents,
/**
* Structured document representation
*/
@Nullable @JsonProperty("document") DocumentStructure document,
/**
* Hyperlinks discovered in slides as (url, optional_label) pairs.
*/
@Nullable @JsonProperty("hyperlinks") List<String> hyperlinks,
/**
* Office metadata extracted from docProps/core.xml and docProps/app.xml.
*
* Contains keys like "title", "author", "created_by", "subject", "keywords",
* "modified_by", "created_at", "modified_at", etc.
*/
@Nullable @JsonProperty("office_metadata") Map<String, String> officeMetadata,
/**
* Slide comments as revisions.
*
* Each {@code &lt;p:cm&gt;} element in {@code ppt/comments/comment{N}.xml} becomes a
* {@code DocumentRevision { kind: Comment }} with author (resolved from
* {@code ppt/commentAuthors.xml}), ISO-8601 timestamp, and
* {@code RevisionAnchor.Slide { index }}. {@code None} when no comment XML parts exist.
*/
@Nullable @JsonProperty("revisions") List<DocumentRevision> revisions
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private String content = "";
private PptxMetadata metadata = null;
@JsonProperty("slide_count")
private long slideCount = 0;
@JsonProperty("image_count")
private long imageCount = 0;
@JsonProperty("table_count")
private long tableCount = 0;
private List<ExtractedImage> images = List.of();
@JsonProperty("page_structure")
private PageStructure pageStructure = null;
@JsonProperty("page_contents")
private List<PageContent> pageContents = null;
private DocumentStructure document = null;
private List<String> hyperlinks = null;
@JsonProperty("office_metadata")
private Map<String, String> officeMetadata = null;
private List<DocumentRevision> revisions = null;
/** Sets the content field. */
@JsonProperty("content")
public Builder withContent(final String value) {
this.content = value;
return this;
}
/** Sets the metadata field. */
@JsonProperty("metadata")
public Builder withMetadata(final PptxMetadata value) {
this.metadata = value;
return this;
}
/** Sets the slideCount field. */
@JsonProperty("slide_count")
public Builder withSlideCount(final long value) {
this.slideCount = value;
return this;
}
/** Sets the imageCount field. */
@JsonProperty("image_count")
public Builder withImageCount(final long value) {
this.imageCount = value;
return this;
}
/** Sets the tableCount field. */
@JsonProperty("table_count")
public Builder withTableCount(final long value) {
this.tableCount = value;
return this;
}
/** Sets the images field. */
@JsonProperty("images")
public Builder withImages(final List<ExtractedImage> value) {
this.images = value;
return this;
}
/** Sets the pageStructure field. */
@JsonProperty("page_structure")
public Builder withPageStructure(final @Nullable PageStructure value) {
this.pageStructure = value;
return this;
}
/** Sets the pageContents field. */
@JsonProperty("page_contents")
public Builder withPageContents(final @Nullable List<PageContent> value) {
this.pageContents = value;
return this;
}
/** Sets the document field. */
@JsonProperty("document")
public Builder withDocument(final @Nullable DocumentStructure value) {
this.document = value;
return this;
}
/** Sets the hyperlinks field. */
@JsonProperty("hyperlinks")
public Builder withHyperlinks(final @Nullable List<String> value) {
this.hyperlinks = value;
return this;
}
/** Sets the officeMetadata field. */
@JsonProperty("office_metadata")
public Builder withOfficeMetadata(final @Nullable Map<String, String> value) {
this.officeMetadata = value;
return this;
}
/** Sets the revisions field. */
@JsonProperty("revisions")
public Builder withRevisions(final @Nullable List<DocumentRevision> value) {
this.revisions = value;
return this;
}
/** Builds the PptxExtractionResult instance. */
public PptxExtractionResult build() {
return new PptxExtractionResult(
content,
metadata,
slideCount,
imageCount,
tableCount,
images,
pageStructure,
pageContents,
document,
hyperlinks,
officeMetadata,
revisions
);
}
}
// CPD-ON
}