Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
```java title="Java"
import dev.kreuzberg.ChunkingConfig;
import dev.kreuzberg.EmbeddingConfig;
import dev.kreuzberg.EmbeddingModelType;
import dev.kreuzberg.ExtractionConfig;
ExtractionConfig config = ExtractionConfig.builder()
.chunking(ChunkingConfig.builder()
.maxChars(1500)
.maxOverlap(200)
.embedding(EmbeddingConfig.builder()
.model(EmbeddingModelType.builder()
.type("preset")
.name("text-embedding-all-minilm-l6-v2")
.build())
.build())
.build())
.build();
```

View File

@@ -0,0 +1,19 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ChunkingConfig;
ExtractionConfig config = ExtractionConfig.builder()
.chunking(ChunkingConfig.builder()
.maxChars(500)
.maxOverlap(50)
.embedding("balanced")
.build())
.build();
ExtractionResult result = Kreuzberg.extractFile("research_paper.pdf", config);
System.out.println("Content: " + result.getContent()
.substring(0, Math.min(100, result.getContent().length())) + "...");
```

View File

@@ -0,0 +1,12 @@
```java title="Java"
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ChunkingConfig;
ExtractionConfig config = ExtractionConfig.builder()
.chunking(ChunkingConfig.builder()
.maxChars(1024)
.maxOverlap(100)
.embedding("balanced")
.build())
.build();
```

View File

@@ -0,0 +1,4 @@
```java title="Java"
// Note: Keyword extraction is not yet available in Java bindings
// This feature requires the 'keywords' feature flag and is planned for a future release
```

View File

@@ -0,0 +1,20 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
ExtractionConfig config = ExtractionConfig.builder()
.enableQualityProcessing(true)
.build();
ExtractionResult result = Kreuzberg.extractFile("scanned_document.pdf", config);
double qualityScore = result.getQualityScore() != null ? result.getQualityScore() : 0.0;
if (qualityScore < 0.5) {
System.out.printf("Warning: Low quality extraction (%.2f)%n", qualityScore);
System.out.println("Consider re-scanning or adjusting OCR settings");
} else {
System.out.printf("Quality score: %.2f%n", qualityScore);
}
```

View File

@@ -0,0 +1,14 @@
```java
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.EmbeddingConfig;
// Embed with default config
float[][] embeddings = Kreuzberg.embed(List.of("Hello world", "How are you?"), null);
// Embed with specific preset
EmbeddingConfig config = EmbeddingConfig.withPreset("fast");
float[][] fastEmbeddings = Kreuzberg.embed(List.of("Hello world"), config);
// Async variant
CompletableFuture<float[][]> future = Kreuzberg.embedAsync(texts, null);
```

View File

@@ -0,0 +1,11 @@
```java title="Java"
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.TokenReductionConfig;
ExtractionConfig config = ExtractionConfig.builder()
.tokenReduction(TokenReductionConfig.builder()
.mode("moderate")
.preserveImportantWords(true)
.build())
.build();
```

View File

@@ -0,0 +1,22 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.TokenReductionConfig;
ExtractionConfig config = ExtractionConfig.builder()
.tokenReduction(TokenReductionConfig.builder()
.mode("moderate")
.preserveImportantWords(true)
.build())
.build();
ExtractionResult result = Kreuzberg.extractFile("verbose_document.pdf", config);
Object originalTokens = result.getMetadata().get("original_token_count");
Object reducedTokens = result.getMetadata().get("token_count");
Object reductionRatio = result.getMetadata().get("token_reduction_ratio");
System.out.println("Reduced from " + originalTokens + " to " + reducedTokens + " tokens");
System.out.println("Reduction: " + ((Number)reductionRatio).doubleValue() * 100 + "%");
```