This commit is contained in:
19
docs/snippets/java/utils/chunking.md
Normal file
19
docs/snippets/java/utils/chunking.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.ChunkingConfig;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.EmbeddingModelType;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.chunking(ChunkingConfig.builder()
|
||||
.maxChars(1500)
|
||||
.maxOverlap(200)
|
||||
.embedding(EmbeddingConfig.builder()
|
||||
.model(EmbeddingModelType.builder()
|
||||
.type("preset")
|
||||
.name("text-embedding-all-minilm-l6-v2")
|
||||
.build())
|
||||
.build())
|
||||
.build())
|
||||
.build();
|
||||
```
|
||||
19
docs/snippets/java/utils/chunking_rag.md
Normal file
19
docs/snippets/java/utils/chunking_rag.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.ChunkingConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.chunking(ChunkingConfig.builder()
|
||||
.maxChars(500)
|
||||
.maxOverlap(50)
|
||||
.embedding("balanced")
|
||||
.build())
|
||||
.build();
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile("research_paper.pdf", config);
|
||||
|
||||
System.out.println("Content: " + result.getContent()
|
||||
.substring(0, Math.min(100, result.getContent().length())) + "...");
|
||||
```
|
||||
12
docs/snippets/java/utils/embedding_with_chunking.md
Normal file
12
docs/snippets/java/utils/embedding_with_chunking.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.ChunkingConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.chunking(ChunkingConfig.builder()
|
||||
.maxChars(1024)
|
||||
.maxOverlap(100)
|
||||
.embedding("balanced")
|
||||
.build())
|
||||
.build();
|
||||
```
|
||||
4
docs/snippets/java/utils/keyword_extraction_example.md
Normal file
4
docs/snippets/java/utils/keyword_extraction_example.md
Normal file
@@ -0,0 +1,4 @@
|
||||
```java title="Java"
|
||||
// Note: Keyword extraction is not yet available in Java bindings
|
||||
// This feature requires the 'keywords' feature flag and is planned for a future release
|
||||
```
|
||||
20
docs/snippets/java/utils/quality_processing_example.md
Normal file
20
docs/snippets/java/utils/quality_processing_example.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.enableQualityProcessing(true)
|
||||
.build();
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile("scanned_document.pdf", config);
|
||||
|
||||
double qualityScore = result.getQualityScore() != null ? result.getQualityScore() : 0.0;
|
||||
|
||||
if (qualityScore < 0.5) {
|
||||
System.out.printf("Warning: Low quality extraction (%.2f)%n", qualityScore);
|
||||
System.out.println("Consider re-scanning or adjusting OCR settings");
|
||||
} else {
|
||||
System.out.printf("Quality score: %.2f%n", qualityScore);
|
||||
}
|
||||
```
|
||||
14
docs/snippets/java/utils/standalone_embed.md
Normal file
14
docs/snippets/java/utils/standalone_embed.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```java
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
|
||||
// Embed with default config
|
||||
float[][] embeddings = Kreuzberg.embed(List.of("Hello world", "How are you?"), null);
|
||||
|
||||
// Embed with specific preset
|
||||
EmbeddingConfig config = EmbeddingConfig.withPreset("fast");
|
||||
float[][] fastEmbeddings = Kreuzberg.embed(List.of("Hello world"), config);
|
||||
|
||||
// Async variant
|
||||
CompletableFuture<float[][]> future = Kreuzberg.embedAsync(texts, null);
|
||||
```
|
||||
11
docs/snippets/java/utils/token_reduction.md
Normal file
11
docs/snippets/java/utils/token_reduction.md
Normal file
@@ -0,0 +1,11 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.TokenReductionConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.tokenReduction(TokenReductionConfig.builder()
|
||||
.mode("moderate")
|
||||
.preserveImportantWords(true)
|
||||
.build())
|
||||
.build();
|
||||
```
|
||||
22
docs/snippets/java/utils/token_reduction_example.md
Normal file
22
docs/snippets/java/utils/token_reduction_example.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.TokenReductionConfig;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.tokenReduction(TokenReductionConfig.builder()
|
||||
.mode("moderate")
|
||||
.preserveImportantWords(true)
|
||||
.build())
|
||||
.build();
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile("verbose_document.pdf", config);
|
||||
|
||||
Object originalTokens = result.getMetadata().get("original_token_count");
|
||||
Object reducedTokens = result.getMetadata().get("token_count");
|
||||
Object reductionRatio = result.getMetadata().get("token_reduction_ratio");
|
||||
|
||||
System.out.println("Reduced from " + originalTokens + " to " + reducedTokens + " tokens");
|
||||
System.out.println("Reduction: " + ((Number)reductionRatio).doubleValue() * 100 + "%");
|
||||
```
|
||||
Reference in New Issue
Block a user