Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,109 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
package dev.kreuzberg;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder;
import org.jspecify.annotations.Nullable;
/**
* Hierarchy extraction configuration for PDF text structure analysis.
*
* Enables extraction of document hierarchy levels (H1-H6) based on font size
* clustering and semantic analysis. When enabled, hierarchical blocks are
* included in page content.
*/
@JsonInclude(JsonInclude.Include.NON_ABSENT)
@JsonDeserialize(builder = HierarchyConfig.Builder.class)
public record HierarchyConfig(
/**
* Enable hierarchy extraction
*/
@Nullable @JsonProperty("enabled") Boolean enabled,
/**
* Number of font size clusters to use for hierarchy levels (1-7)
*
* Default: 6, which provides H1-H6 heading levels with body text.
* Larger values create more fine-grained hierarchy levels.
*/
@Nullable @JsonProperty("k_clusters") Long kClusters,
/**
* Include bounding box information in hierarchy blocks
*/
@Nullable @JsonProperty("include_bbox") Boolean includeBbox,
/**
* OCR coverage threshold for smart OCR triggering (0.0-1.0)
*
* Determines when OCR should be triggered based on text block coverage.
* OCR is triggered when text blocks cover less than this fraction of the page.
* Default: 0.5 (trigger OCR if less than 50% of page has text)
*/
@Nullable @JsonProperty("ocr_coverage_threshold") Float ocrCoverageThreshold
) {
public static Builder builder() {
return new Builder();
}
public HierarchyConfig{
if (kClusters == null) kClusters = 3L;
}
// CPD-OFF
@JsonPOJOBuilder(withPrefix = "with", buildMethodName = "build")
public static final class Builder {
private Boolean enabled = null;
@JsonProperty("k_clusters")
private Long kClusters = null;
@JsonProperty("include_bbox")
private Boolean includeBbox = null;
@JsonProperty("ocr_coverage_threshold")
private Float ocrCoverageThreshold = null;
/** Sets the enabled field. */
@JsonProperty("enabled")
public Builder withEnabled(final @Nullable Boolean value) {
this.enabled = value;
return this;
}
/** Sets the kClusters field. */
@JsonProperty("k_clusters")
public Builder withKClusters(final @Nullable Long value) {
this.kClusters = value;
return this;
}
/** Sets the includeBbox field. */
@JsonProperty("include_bbox")
public Builder withIncludeBbox(final @Nullable Boolean value) {
this.includeBbox = value;
return this;
}
/** Sets the ocrCoverageThreshold field. */
@JsonProperty("ocr_coverage_threshold")
public Builder withOcrCoverageThreshold(final @Nullable Float value) {
this.ocrCoverageThreshold = value;
return this;
}
/** Builds the HierarchyConfig instance. */
public HierarchyConfig build() {
return new HierarchyConfig(
enabled,
kClusters,
includeBbox,
ocrCoverageThreshold
);
}
}
// CPD-ON
public static HierarchyConfig defaultInstance() {
throw new UnsupportedOperationException("defaultInstance is not yet bridged via JNI; use the Builder instead.");
}
}