// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef package dev.kreuzberg; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; import org.jspecify.annotations.Nullable; /** * Hierarchy extraction configuration for PDF text structure analysis. * * Enables extraction of document hierarchy levels (H1-H6) based on font size * clustering and semantic analysis. When enabled, hierarchical blocks are * included in page content. */ @JsonInclude(JsonInclude.Include.NON_ABSENT) @JsonDeserialize(builder = HierarchyConfig.Builder.class) public record HierarchyConfig( /** * Enable hierarchy extraction */ @Nullable @JsonProperty("enabled") Boolean enabled, /** * Number of font size clusters to use for hierarchy levels (1-7) * * Default: 6, which provides H1-H6 heading levels with body text. * Larger values create more fine-grained hierarchy levels. */ @Nullable @JsonProperty("k_clusters") Long kClusters, /** * Include bounding box information in hierarchy blocks */ @Nullable @JsonProperty("include_bbox") Boolean includeBbox, /** * OCR coverage threshold for smart OCR triggering (0.0-1.0) * * Determines when OCR should be triggered based on text block coverage. * OCR is triggered when text blocks cover less than this fraction of the page. * Default: 0.5 (trigger OCR if less than 50% of page has text) */ @Nullable @JsonProperty("ocr_coverage_threshold") Float ocrCoverageThreshold ) { public static Builder builder() { return new Builder(); } public HierarchyConfig{ if (kClusters == null) kClusters = 3L; } // CPD-OFF @JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build") public static final class Builder { private Boolean enabled = null; @JsonProperty("k_clusters") private Long kClusters = null; @JsonProperty("include_bbox") private Boolean includeBbox = null; @JsonProperty("ocr_coverage_threshold") private Float ocrCoverageThreshold = null; /** Sets the enabled field. */ @JsonProperty("enabled") public Builder withEnabled(final @Nullable Boolean value) { this.enabled = value; return this; } /** Sets the kClusters field. */ @JsonProperty("k_clusters") public Builder withKClusters(final @Nullable Long value) { this.kClusters = value; return this; } /** Sets the includeBbox field. */ @JsonProperty("include_bbox") public Builder withIncludeBbox(final @Nullable Boolean value) { this.includeBbox = value; return this; } /** Sets the ocrCoverageThreshold field. */ @JsonProperty("ocr_coverage_threshold") public Builder withOcrCoverageThreshold(final @Nullable Float value) { this.ocrCoverageThreshold = value; return this; } /** Builds the HierarchyConfig instance. */ public HierarchyConfig build() { return new HierarchyConfig( enabled, kClusters, includeBbox, ocrCoverageThreshold ); } } // CPD-ON public static HierarchyConfig defaultInstance() { throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead."); } }