212 lines
6.8 KiB
Java
212 lines
6.8 KiB
Java
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
package dev.kreuzberg;
|
||
|
|
|
||
|
|
import java.util.List;
|
||
|
|
import java.util.Map;
|
||
|
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||
|
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||
|
|
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||
|
|
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
||
|
|
import org.jspecify.annotations.Nullable;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* PowerPoint (PPTX) extraction result.
|
||
|
|
*
|
||
|
|
* Contains extracted slide content, metadata, and embedded images/tables.
|
||
|
|
*/
|
||
|
|
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
||
|
|
@JsonDeserialize(builder = PptxExtractionResult.Builder.class)
|
||
|
|
public record PptxExtractionResult(
|
||
|
|
/**
|
||
|
|
* Extracted text content from all slides
|
||
|
|
*/
|
||
|
|
@JsonProperty("content") String content,
|
||
|
|
/**
|
||
|
|
* Presentation metadata
|
||
|
|
*/
|
||
|
|
@JsonProperty("metadata") PptxMetadata metadata,
|
||
|
|
/**
|
||
|
|
* Total number of slides
|
||
|
|
*/
|
||
|
|
@JsonProperty("slide_count") long slideCount,
|
||
|
|
/**
|
||
|
|
* Total number of embedded images
|
||
|
|
*/
|
||
|
|
@JsonProperty("image_count") long imageCount,
|
||
|
|
/**
|
||
|
|
* Total number of tables
|
||
|
|
*/
|
||
|
|
@JsonProperty("table_count") long tableCount,
|
||
|
|
/**
|
||
|
|
* Extracted images from the presentation
|
||
|
|
*/
|
||
|
|
@JsonProperty("images") List<ExtractedImage> images,
|
||
|
|
/**
|
||
|
|
* Slide structure with boundaries (when page tracking is enabled)
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("page_structure") PageStructure pageStructure,
|
||
|
|
/**
|
||
|
|
* Per-slide content (when page tracking is enabled)
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("page_contents") List<PageContent> pageContents,
|
||
|
|
/**
|
||
|
|
* Structured document representation
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("document") DocumentStructure document,
|
||
|
|
/**
|
||
|
|
* Hyperlinks discovered in slides as (url, optional_label) pairs.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("hyperlinks") List<String> hyperlinks,
|
||
|
|
/**
|
||
|
|
* Office metadata extracted from docProps/core.xml and docProps/app.xml.
|
||
|
|
*
|
||
|
|
* Contains keys like "title", "author", "created_by", "subject", "keywords",
|
||
|
|
* "modified_by", "created_at", "modified_at", etc.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("office_metadata") Map<String, String> officeMetadata,
|
||
|
|
/**
|
||
|
|
* Slide comments as revisions.
|
||
|
|
*
|
||
|
|
* Each {@code <p:cm>} element in {@code ppt/comments/comment{N}.xml} becomes a
|
||
|
|
* {@code DocumentRevision { kind: Comment }} with author (resolved from
|
||
|
|
* {@code ppt/commentAuthors.xml}), ISO-8601 timestamp, and
|
||
|
|
* {@code RevisionAnchor.Slide { index }}. {@code None} when no comment XML parts exist.
|
||
|
|
*/
|
||
|
|
@Nullable @JsonProperty("revisions") List<DocumentRevision> revisions
|
||
|
|
) {
|
||
|
|
public static Builder builder() {
|
||
|
|
return new Builder();
|
||
|
|
}
|
||
|
|
|
||
|
|
// CPD-OFF
|
||
|
|
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
||
|
|
public static final class Builder {
|
||
|
|
|
||
|
|
private String content = "";
|
||
|
|
private PptxMetadata metadata = null;
|
||
|
|
@JsonProperty("slide_count")
|
||
|
|
private long slideCount = 0;
|
||
|
|
@JsonProperty("image_count")
|
||
|
|
private long imageCount = 0;
|
||
|
|
@JsonProperty("table_count")
|
||
|
|
private long tableCount = 0;
|
||
|
|
private List<ExtractedImage> images = List.of();
|
||
|
|
@JsonProperty("page_structure")
|
||
|
|
private PageStructure pageStructure = null;
|
||
|
|
@JsonProperty("page_contents")
|
||
|
|
private List<PageContent> pageContents = null;
|
||
|
|
private DocumentStructure document = null;
|
||
|
|
private List<String> hyperlinks = null;
|
||
|
|
@JsonProperty("office_metadata")
|
||
|
|
private Map<String, String> officeMetadata = null;
|
||
|
|
private List<DocumentRevision> revisions = null;
|
||
|
|
|
||
|
|
/** Sets the content field. */
|
||
|
|
@JsonProperty("content")
|
||
|
|
public Builder withContent(final String value) {
|
||
|
|
this.content = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the metadata field. */
|
||
|
|
@JsonProperty("metadata")
|
||
|
|
public Builder withMetadata(final PptxMetadata value) {
|
||
|
|
this.metadata = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the slideCount field. */
|
||
|
|
@JsonProperty("slide_count")
|
||
|
|
public Builder withSlideCount(final long value) {
|
||
|
|
this.slideCount = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the imageCount field. */
|
||
|
|
@JsonProperty("image_count")
|
||
|
|
public Builder withImageCount(final long value) {
|
||
|
|
this.imageCount = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the tableCount field. */
|
||
|
|
@JsonProperty("table_count")
|
||
|
|
public Builder withTableCount(final long value) {
|
||
|
|
this.tableCount = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the images field. */
|
||
|
|
@JsonProperty("images")
|
||
|
|
public Builder withImages(final List<ExtractedImage> value) {
|
||
|
|
this.images = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the pageStructure field. */
|
||
|
|
@JsonProperty("page_structure")
|
||
|
|
public Builder withPageStructure(final @Nullable PageStructure value) {
|
||
|
|
this.pageStructure = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the pageContents field. */
|
||
|
|
@JsonProperty("page_contents")
|
||
|
|
public Builder withPageContents(final @Nullable List<PageContent> value) {
|
||
|
|
this.pageContents = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the document field. */
|
||
|
|
@JsonProperty("document")
|
||
|
|
public Builder withDocument(final @Nullable DocumentStructure value) {
|
||
|
|
this.document = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the hyperlinks field. */
|
||
|
|
@JsonProperty("hyperlinks")
|
||
|
|
public Builder withHyperlinks(final @Nullable List<String> value) {
|
||
|
|
this.hyperlinks = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the officeMetadata field. */
|
||
|
|
@JsonProperty("office_metadata")
|
||
|
|
public Builder withOfficeMetadata(final @Nullable Map<String, String> value) {
|
||
|
|
this.officeMetadata = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Sets the revisions field. */
|
||
|
|
@JsonProperty("revisions")
|
||
|
|
public Builder withRevisions(final @Nullable List<DocumentRevision> value) {
|
||
|
|
this.revisions = value;
|
||
|
|
return this;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Builds the PptxExtractionResult instance. */
|
||
|
|
public PptxExtractionResult build() {
|
||
|
|
return new PptxExtractionResult(
|
||
|
|
content,
|
||
|
|
metadata,
|
||
|
|
slideCount,
|
||
|
|
imageCount,
|
||
|
|
tableCount,
|
||
|
|
images,
|
||
|
|
pageStructure,
|
||
|
|
pageContents,
|
||
|
|
document,
|
||
|
|
hyperlinks,
|
||
|
|
officeMetadata,
|
||
|
|
revisions
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// CPD-ON
|
||
|
|
}
|