Files
fil/packages/java/dev/kreuzberg/PdfConfig.java
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

199 lines
7.4 KiB
Java
Generated
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* PDF-specific configuration.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = PdfConfig.Builder.class)
public record PdfConfig(
/**
* Extract images from PDF
*/
@Nullable @JsonProperty("extract_images") Boolean extractImages,
/**
* Extract tables from PDF.
*
* When {@code true} (default), runs pdf_oxide's native grid detector and, if it
* finds nothing, falls back to the heuristic text-layer reconstruction in
* {@code pdf.oxide.table.extract_tables_heuristic}. Set to {@code false} to skip
* both passes — {@code tables} will then be empty in the result.
*/
@Nullable @JsonProperty("extract_tables") Boolean extractTables,
/**
* List of passwords to try when opening encrypted PDFs
*/
@Nullable @JsonProperty("passwords") List<String> passwords,
/**
* Extract PDF metadata
*/
@Nullable @JsonProperty("extract_metadata") Boolean extractMetadata,
/**
* Hierarchy extraction configuration (null = hierarchy extraction disabled)
*/
@Nullable @JsonProperty("hierarchy") HierarchyConfig hierarchy,
/**
* Extract PDF annotations (text notes, highlights, links, stamps).
* Default: false
*/
@Nullable @JsonProperty("extract_annotations") Boolean extractAnnotations,
/**
* Top margin fraction (0.01.0) of page height to exclude headers/running heads.
* Default: 0.06 (6%)
*/
@Nullable @JsonProperty("top_margin_fraction") Float topMarginFraction,
/**
* Bottom margin fraction (0.01.0) of page height to exclude footers/page numbers.
* Default: 0.05 (5%)
*/
@Nullable @JsonProperty("bottom_margin_fraction") Float bottomMarginFraction,
/**
* Allow single-column pseudo tables in extraction results.
*
* By default, tables with fewer than 2 columns (layout-guided) or 3 columns
* (heuristic) are rejected. When {@code true}, the minimum column count is relaxed
* to 1, allowing single-column structured data (glossaries, itemized lists)
* to be emitted as tables. Other quality filters (density, sparsity, prose
* detection) still apply.
*/
@Nullable @JsonProperty("allow_single_column_tables") Boolean allowSingleColumnTables,
/**
* Perform OCR on inline images extracted from PDF pages and attach the
* recognized text to each {@code ExtractedImage.ocr_result}. Requires Tesseract
* to be available; if {@code ExtractionConfig.ocr} is {@code None} the extractor
* falls back to {@code TesseractConfig.default()}. Per-image failures degrade
* gracefully (the image is returned without OCR text rather than failing
* the whole extraction). Default: {@code false}.
*/
@Nullable @JsonProperty("ocr_inline_images") Boolean ocrInlineImages
) {
public static Builder builder() {
return new Builder();
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
@JsonProperty("extract_images")
private Boolean extractImages = null;
@JsonProperty("extract_tables")
private Boolean extractTables = null;
private List<String> passwords = null;
@JsonProperty("extract_metadata")
private Boolean extractMetadata = null;
@Nullable private HierarchyConfig hierarchy = null;
@JsonProperty("extract_annotations")
private Boolean extractAnnotations = null;
@JsonProperty("top_margin_fraction")
private Float topMarginFraction = null;
@JsonProperty("bottom_margin_fraction")
private Float bottomMarginFraction = null;
@JsonProperty("allow_single_column_tables")
private Boolean allowSingleColumnTables = null;
@JsonProperty("ocr_inline_images")
private Boolean ocrInlineImages = null;
/** Sets the extractImages field. */
@JsonProperty("extract_images")
public Builder withExtractImages(final @Nullable Boolean value) {
this.extractImages = value;
return this;
}
/** Sets the extractTables field. */
@JsonProperty("extract_tables")
public Builder withExtractTables(final @Nullable Boolean value) {
this.extractTables = value;
return this;
}
/** Sets the passwords field. */
@JsonProperty("passwords")
public Builder withPasswords(final @Nullable List<String> value) {
this.passwords = value;
return this;
}
/** Sets the extractMetadata field. */
@JsonProperty("extract_metadata")
public Builder withExtractMetadata(final @Nullable Boolean value) {
this.extractMetadata = value;
return this;
}
/** Sets the hierarchy field. */
@JsonProperty("hierarchy")
public Builder withHierarchy(final @Nullable HierarchyConfig value) {
this.hierarchy = value;
return this;
}
/** Sets the extractAnnotations field. */
@JsonProperty("extract_annotations")
public Builder withExtractAnnotations(final @Nullable Boolean value) {
this.extractAnnotations = value;
return this;
}
/** Sets the topMarginFraction field. */
@JsonProperty("top_margin_fraction")
public Builder withTopMarginFraction(final @Nullable Float value) {
this.topMarginFraction = value;
return this;
}
/** Sets the bottomMarginFraction field. */
@JsonProperty("bottom_margin_fraction")
public Builder withBottomMarginFraction(final @Nullable Float value) {
this.bottomMarginFraction = value;
return this;
}
/** Sets the allowSingleColumnTables field. */
@JsonProperty("allow_single_column_tables")
public Builder withAllowSingleColumnTables(final @Nullable Boolean value) {
this.allowSingleColumnTables = value;
return this;
}
/** Sets the ocrInlineImages field. */
@JsonProperty("ocr_inline_images")
public Builder withOcrInlineImages(final @Nullable Boolean value) {
this.ocrInlineImages = value;
return this;
}
/** Builds the PdfConfig instance. */
public PdfConfig build() {
return new PdfConfig(
extractImages,
extractTables,
passwords,
extractMetadata,
hierarchy,
extractAnnotations,
topMarginFraction,
bottomMarginFraction,
allowSingleColumnTables,
ocrInlineImages
);
}
}
// CPD-ON
public static PdfConfig defaultInstance() {
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
}
}