// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import java.util.List; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * PDF-specific configuration. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = PdfConfig.Builder.class) public record PdfConfig( /** * Extract images from PDF */ @Nullable @JsonProperty("extract_images") Boolean extractImages, /** * Extract tables from PDF. * * When {@code true} (default), runs pdf_oxide's native grid detector and, if it * finds nothing, falls back to the heuristic text-layer reconstruction in * {@code pdf.oxide.table.extract_tables_heuristic}. Set to {@code false} to skip * both passes — {@code tables} will then be empty in the result. */ @Nullable @JsonProperty("extract_tables") Boolean extractTables, /** * List of passwords to try when opening encrypted PDFs */ @Nullable @JsonProperty("passwords") List passwords, /** * Extract PDF metadata */ @Nullable @JsonProperty("extract_metadata") Boolean extractMetadata, /** * Hierarchy extraction configuration (null = hierarchy extraction disabled) */ @Nullable @JsonProperty("hierarchy") HierarchyConfig hierarchy, /** * Extract PDF annotations (text notes, highlights, links, stamps). * Default: false */ @Nullable @JsonProperty("extract_annotations") Boolean extractAnnotations, /** * Top margin fraction (0.0–1.0) of page height to exclude headers/running heads. * Default: 0.06 (6%) */ @Nullable @JsonProperty("top_margin_fraction") Float topMarginFraction, /** * Bottom margin fraction (0.0–1.0) of page height to exclude footers/page numbers. * Default: 0.05 (5%) */ @Nullable @JsonProperty("bottom_margin_fraction") Float bottomMarginFraction, /** * Allow single-column pseudo tables in extraction results. * * By default, tables with fewer than 2 columns (layout-guided) or 3 columns * (heuristic) are rejected. When {@code true}, the minimum column count is relaxed * to 1, allowing single-column structured data (glossaries, itemized lists) * to be emitted as tables. Other quality filters (density, sparsity, prose * detection) still apply. */ @Nullable @JsonProperty("allow_single_column_tables") Boolean allowSingleColumnTables, /** * Perform OCR on inline images extracted from PDF pages and attach the * recognized text to each {@code ExtractedImage.ocr_result}. Requires Tesseract * to be available; if {@code ExtractionConfig.ocr} is {@code None} the extractor * falls back to {@code TesseractConfig.default()}. Per-image failures degrade * gracefully (the image is returned without OCR text rather than failing * the whole extraction). Default: {@code false}. */ @Nullable @JsonProperty("ocr_inline_images") Boolean ocrInlineImages ) { public static Builder builder() { return new Builder(); } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { @JsonProperty("extract_images") private Boolean extractImages = null; @JsonProperty("extract_tables") private Boolean extractTables = null; private List passwords = null; @JsonProperty("extract_metadata") private Boolean extractMetadata = null; @Nullable private HierarchyConfig hierarchy = null; @JsonProperty("extract_annotations") private Boolean extractAnnotations = null; @JsonProperty("top_margin_fraction") private Float topMarginFraction = null; @JsonProperty("bottom_margin_fraction") private Float bottomMarginFraction = null; @JsonProperty("allow_single_column_tables") private Boolean allowSingleColumnTables = null; @JsonProperty("ocr_inline_images") private Boolean ocrInlineImages = null; /** Sets the extractImages field. */ @JsonProperty("extract_images") public Builder withExtractImages(final @Nullable Boolean value) { this.extractImages = value; return this; } /** Sets the extractTables field. */ @JsonProperty("extract_tables") public Builder withExtractTables(final @Nullable Boolean value) { this.extractTables = value; return this; } /** Sets the passwords field. */ @JsonProperty("passwords") public Builder withPasswords(final @Nullable List value) { this.passwords = value; return this; } /** Sets the extractMetadata field. */ @JsonProperty("extract_metadata") public Builder withExtractMetadata(final @Nullable Boolean value) { this.extractMetadata = value; return this; } /** Sets the hierarchy field. */ @JsonProperty("hierarchy") public Builder withHierarchy(final @Nullable HierarchyConfig value) { this.hierarchy = value; return this; } /** Sets the extractAnnotations field. */ @JsonProperty("extract_annotations") public Builder withExtractAnnotations(final @Nullable Boolean value) { this.extractAnnotations = value; return this; } /** Sets the topMarginFraction field. */ @JsonProperty("top_margin_fraction") public Builder withTopMarginFraction(final @Nullable Float value) { this.topMarginFraction = value; return this; } /** Sets the bottomMarginFraction field. */ @JsonProperty("bottom_margin_fraction") public Builder withBottomMarginFraction(final @Nullable Float value) { this.bottomMarginFraction = value; return this; } /** Sets the allowSingleColumnTables field. */ @JsonProperty("allow_single_column_tables") public Builder withAllowSingleColumnTables(final @Nullable Boolean value) { this.allowSingleColumnTables = value; return this; } /** Sets the ocrInlineImages field. */ @JsonProperty("ocr_inline_images") public Builder withOcrInlineImages(final @Nullable Boolean value) { this.ocrInlineImages = value; return this; } /** Builds the PdfConfig instance. */ public PdfConfig build() { return new PdfConfig( extractImages, extractTables, passwords, extractMetadata, hierarchy, extractAnnotations, topMarginFraction, bottomMarginFraction, allowSingleColumnTables, ocrInlineImages ); } } // CPD-ON public static PdfConfig defaultInstance() { throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead."); } }