This commit is contained in:
109
packages/java/dev/kreuzberg/HierarchyConfig.java
generated
Normal file
109
packages/java/dev/kreuzberg/HierarchyConfig.java
generated
Normal file
@@ -0,0 +1,109 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
package dev.kreuzberg;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||||
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
|
||||
import org.jspecify.annotations.Nullable;
|
||||
|
||||
/**
|
||||
* Hierarchy extraction configuration for PDF text structure analysis.
|
||||
*
|
||||
* Enables extraction of document hierarchy levels (H1-H6) based on font size
|
||||
* clustering and semantic analysis. When enabled, hierarchical blocks are
|
||||
* included in page content.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_ABSENT)
|
||||
@JsonDeserialize(builder = HierarchyConfig.Builder.class)
|
||||
public record HierarchyConfig(
|
||||
/**
|
||||
* Enable hierarchy extraction
|
||||
*/
|
||||
@Nullable @JsonProperty("enabled") Boolean enabled,
|
||||
/**
|
||||
* Number of font size clusters to use for hierarchy levels (1-7)
|
||||
*
|
||||
* Default: 6, which provides H1-H6 heading levels with body text.
|
||||
* Larger values create more fine-grained hierarchy levels.
|
||||
*/
|
||||
@Nullable @JsonProperty("k_clusters") Long kClusters,
|
||||
/**
|
||||
* Include bounding box information in hierarchy blocks
|
||||
*/
|
||||
@Nullable @JsonProperty("include_bbox") Boolean includeBbox,
|
||||
/**
|
||||
* OCR coverage threshold for smart OCR triggering (0.0-1.0)
|
||||
*
|
||||
* Determines when OCR should be triggered based on text block coverage.
|
||||
* OCR is triggered when text blocks cover less than this fraction of the page.
|
||||
* Default: 0.5 (trigger OCR if less than 50% of page has text)
|
||||
*/
|
||||
@Nullable @JsonProperty("ocr_coverage_threshold") Float ocrCoverageThreshold
|
||||
) {
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
public HierarchyConfig{
|
||||
if (kClusters == null) kClusters = 3L;
|
||||
}
|
||||
|
||||
// CPD-OFF
|
||||
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
|
||||
public static final class Builder {
|
||||
|
||||
private Boolean enabled = null;
|
||||
@JsonProperty("k_clusters")
|
||||
private Long kClusters = null;
|
||||
@JsonProperty("include_bbox")
|
||||
private Boolean includeBbox = null;
|
||||
@JsonProperty("ocr_coverage_threshold")
|
||||
private Float ocrCoverageThreshold = null;
|
||||
|
||||
/** Sets the enabled field. */
|
||||
@JsonProperty("enabled")
|
||||
public Builder withEnabled(final @Nullable Boolean value) {
|
||||
this.enabled = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the kClusters field. */
|
||||
@JsonProperty("k_clusters")
|
||||
public Builder withKClusters(final @Nullable Long value) {
|
||||
this.kClusters = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the includeBbox field. */
|
||||
@JsonProperty("include_bbox")
|
||||
public Builder withIncludeBbox(final @Nullable Boolean value) {
|
||||
this.includeBbox = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sets the ocrCoverageThreshold field. */
|
||||
@JsonProperty("ocr_coverage_threshold")
|
||||
public Builder withOcrCoverageThreshold(final @Nullable Float value) {
|
||||
this.ocrCoverageThreshold = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Builds the HierarchyConfig instance. */
|
||||
public HierarchyConfig build() {
|
||||
return new HierarchyConfig(
|
||||
enabled,
|
||||
kClusters,
|
||||
includeBbox,
|
||||
ocrCoverageThreshold
|
||||
);
|
||||
}
|
||||
}
|
||||
// CPD-ON
|
||||
public static HierarchyConfig defaultInstance() {
|
||||
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user