This commit is contained in:
4
docs/snippets/java/plugins/clear_plugins.md
Normal file
4
docs/snippets/java/plugins/clear_plugins.md
Normal file
@@ -0,0 +1,4 @@
|
||||
```java title="Java"
|
||||
// Java does not provide bulk clearing functionality in v4.0.0
|
||||
// Unregister plugins individually using unregisterPostProcessor() and unregisterValidator()
|
||||
```
|
||||
79
docs/snippets/java/plugins/embedding_backend.md
Normal file
79
docs/snippets/java/plugins/embedding_backend.md
Normal file
@@ -0,0 +1,79 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.EmbeddingBackendBridge;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.EmbeddingModelType;
|
||||
import dev.kreuzberg.IEmbeddingBackend;
|
||||
import dev.kreuzberg.KreuzbergRsException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class EmbeddingBackendExample {
|
||||
|
||||
/**
|
||||
* Wrap an already-loaded embedder so kreuzberg can call back into it during
|
||||
* chunking and standalone embed requests.
|
||||
*/
|
||||
static final class MyEmbedder implements IEmbeddingBackend {
|
||||
@Override
|
||||
public String name() {
|
||||
return "my-embedder";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version() {
|
||||
return "1.0.0";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
// Optional warm-up; runs once at registration before dimensions() is cached.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
// Optional cleanup.
|
||||
}
|
||||
|
||||
@Override
|
||||
public long dimensions() {
|
||||
// Captured once at registration; the dispatcher uses this for shape validation.
|
||||
return 768L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<List<Float>> embed(List<String> texts) {
|
||||
// Delegate to the already-loaded host model.
|
||||
List<List<Float>> out = new ArrayList<>(texts.size());
|
||||
for (int i = 0; i < texts.size(); i++) {
|
||||
List<Float> row = new ArrayList<>(768);
|
||||
for (int j = 0; j < 768; j++) {
|
||||
row.add(0.0f);
|
||||
}
|
||||
out.add(row);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
// Register once at startup.
|
||||
EmbeddingBackendBridge.registerEmbeddingBackend(new MyEmbedder());
|
||||
try {
|
||||
EmbeddingConfig config = EmbeddingConfig.builder()
|
||||
.model(new EmbeddingModelType.Plugin("my-embedder"))
|
||||
// Optional: bound the wait on a hung backend (default 60s; null disables).
|
||||
.maxEmbedDurationSecs(30L)
|
||||
.build();
|
||||
|
||||
List<String> texts = List.of("Hello, world!", "Second text");
|
||||
List<List<Float>> vectors = Kreuzberg.embedTexts(texts, config);
|
||||
System.out.println("Generated " + vectors.size() + " vectors");
|
||||
} catch (KreuzbergRsException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
EmbeddingBackendBridge.unregisterEmbeddingBackend("my-embedder");
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
17
docs/snippets/java/plugins/extractor_registration.md
Normal file
17
docs/snippets/java/plugins/extractor_registration.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.KreuzbergException;
|
||||
import java.io.IOException;
|
||||
|
||||
public class CustomExtractorExample {
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
ExtractionResult result = Kreuzberg.extractFile("document.json");
|
||||
System.out.println("Extracted content length: " + result.getContent().length());
|
||||
} catch (IOException | KreuzbergException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
4
docs/snippets/java/plugins/list_plugins.md
Normal file
4
docs/snippets/java/plugins/list_plugins.md
Normal file
@@ -0,0 +1,4 @@
|
||||
```java title="Java"
|
||||
// Java does not provide plugin listing functionality in v4.0.0
|
||||
// Plugins are registered and managed through the FFI layer
|
||||
```
|
||||
34
docs/snippets/java/plugins/min_length_validator.md
Normal file
34
docs/snippets/java/plugins/min_length_validator.md
Normal file
@@ -0,0 +1,34 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.Validator;
|
||||
import dev.kreuzberg.ValidationException;
|
||||
import dev.kreuzberg.KreuzbergException;
|
||||
import java.io.IOException;
|
||||
|
||||
public class MinLengthValidatorExample {
|
||||
public static void main(String[] args) {
|
||||
int minLength = 100;
|
||||
|
||||
Validator minLengthValidator = result -> {
|
||||
if (result.getContent().length() < minLength) {
|
||||
throw new ValidationException(
|
||||
"Content too short: " + result.getContent().length() +
|
||||
" < " + minLength
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
Kreuzberg.registerValidator("min-length", minLengthValidator, 100);
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile("document.pdf");
|
||||
System.out.println("Validation passed!");
|
||||
} catch (ValidationException e) {
|
||||
System.err.println("Validation failed: " + e.getMessage());
|
||||
} catch (IOException | KreuzbergException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
50
docs/snippets/java/plugins/pdf_metadata_extractor.md
Normal file
50
docs/snippets/java/plugins/pdf_metadata_extractor.md
Normal file
@@ -0,0 +1,50 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.PostProcessor;
|
||||
import dev.kreuzberg.KreuzbergException;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
public class PdfMetadataExtractorExample {
|
||||
private static final Logger logger = Logger.getLogger(
|
||||
PdfMetadataExtractorExample.class.getName()
|
||||
);
|
||||
|
||||
public static void main(String[] args) {
|
||||
AtomicInteger processedCount = new AtomicInteger(0);
|
||||
|
||||
PostProcessor pdfMetadata = result -> {
|
||||
if (!result.getMimeType().equals("application/pdf")) {
|
||||
return result;
|
||||
}
|
||||
|
||||
processedCount.incrementAndGet();
|
||||
|
||||
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
|
||||
metadata.put("pdf_processed", true);
|
||||
metadata.put("processing_timestamp", System.currentTimeMillis());
|
||||
|
||||
logger.info("Processed PDF: " + processedCount.get());
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
try {
|
||||
Kreuzberg.registerPostProcessor("pdf-metadata-extractor", pdfMetadata, 50);
|
||||
|
||||
logger.info("PDF metadata extractor initialized");
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile("document.pdf");
|
||||
System.out.println("PDF processed: " + result.getMetadata().get("pdf_processed"));
|
||||
|
||||
logger.info("Processed " + processedCount.get() + " PDFs");
|
||||
} catch (IOException | KreuzbergException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
16
docs/snippets/java/plugins/pdf_only_processor.md
Normal file
16
docs/snippets/java/plugins/pdf_only_processor.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.PostProcessor;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
PostProcessor pdfOnly = result -> {
|
||||
if (!result.getMimeType().equals("application/pdf")) {
|
||||
return result;
|
||||
}
|
||||
|
||||
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
|
||||
metadata.put("pdf_processed", true);
|
||||
|
||||
return result;
|
||||
};
|
||||
```
|
||||
17
docs/snippets/java/plugins/plugin_extractor.md
Normal file
17
docs/snippets/java/plugins/plugin_extractor.md
Normal file
@@ -0,0 +1,17 @@
|
||||
<!-- snippet:skip reason="The Java binding generates IDocumentExtractor + DocumentExtractorBridge but the InternalDocument Java class referenced by the interface is not generated by the alef Java backend. Custom DocumentExtractor implementations cannot construct return values until the alef-generated Panama type for InternalDocument lands." -->
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.IDocumentExtractor;
|
||||
import dev.kreuzberg.DocumentExtractorBridge;
|
||||
|
||||
// Java's Panama FFM binding exposes the IDocumentExtractor interface and the
|
||||
// DocumentExtractorBridge.registerDocumentExtractor / unregisterDocumentExtractor
|
||||
// helpers, but the InternalDocument return type is referenced from the
|
||||
// interface signature without a corresponding generated Java class. Until the
|
||||
// alef Java backend emits dev.kreuzberg.InternalDocument, custom Java
|
||||
// DocumentExtractor implementations cannot return a value from extract_bytes /
|
||||
// extract_file.
|
||||
//
|
||||
// Implement the extractor in Rust as `Plugin + DocumentExtractor` and register
|
||||
// it via `register_document_extractor` in a Rust shim crate that links
|
||||
// kreuzberg before the JVM loads the native library.
|
||||
```
|
||||
22
docs/snippets/java/plugins/plugin_logging.md
Normal file
22
docs/snippets/java/plugins/plugin_logging.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```java title="Java"
|
||||
import java.util.logging.Logger;
|
||||
import java.util.logging.Level;
|
||||
|
||||
class MyPlugin implements PostProcessor {
|
||||
private static final Logger logger = Logger.getLogger(MyPlugin.class.getName());
|
||||
|
||||
@Override
|
||||
public ExtractionResult process(ExtractionResult result) {
|
||||
logger.info("Processing " + result.mimeType() +
|
||||
" (" + result.content().length() + " bytes)");
|
||||
|
||||
// Processing...
|
||||
|
||||
if (result.content().isEmpty()) {
|
||||
logger.warning("Processing resulted in empty content");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
```
|
||||
37
docs/snippets/java/plugins/plugin_testing.md
Normal file
37
docs/snippets/java/plugins/plugin_testing.md
Normal file
@@ -0,0 +1,37 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.PostProcessor;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class PostProcessorTest {
|
||||
@Test
|
||||
void testWordCountProcessor() {
|
||||
PostProcessor processor = result -> {
|
||||
long count = result.getContent().split("\\s+").length;
|
||||
|
||||
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
|
||||
metadata.put("word_count", count);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
ExtractionResult input = new ExtractionResult(
|
||||
"Hello world test",
|
||||
"text/plain",
|
||||
new HashMap<>(),
|
||||
java.util.List.of(),
|
||||
java.util.List.of(),
|
||||
java.util.List.of(),
|
||||
java.util.List.of(),
|
||||
true
|
||||
);
|
||||
|
||||
ExtractionResult output = processor.process(input);
|
||||
|
||||
assertEquals(3, output.getMetadata().get("word_count"));
|
||||
}
|
||||
}
|
||||
```
|
||||
61
docs/snippets/java/plugins/plugin_validator.md
Normal file
61
docs/snippets/java/plugins/plugin_validator.md
Normal file
@@ -0,0 +1,61 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.IValidator;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ValidatorBridge;
|
||||
|
||||
// Generic validator pattern: every IValidator has the same shape.
|
||||
// name() keys the registry, priority() orders execution (higher = earlier),
|
||||
// should_validate() is a fast skip-check, and validate() throws on failure.
|
||||
public class GenericValidator implements IValidator {
|
||||
private final String pluginName;
|
||||
private final int pluginPriority;
|
||||
|
||||
public GenericValidator(String pluginName, int pluginPriority) {
|
||||
this.pluginName = pluginName;
|
||||
this.pluginPriority = pluginPriority;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return pluginName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String version() {
|
||||
return "1.0.0";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
// Optional: open resources, load config files, etc.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
// Optional: release resources held in initialize().
|
||||
}
|
||||
|
||||
@Override
|
||||
public void validate(ExtractionResult result, ExtractionConfig config) throws Exception {
|
||||
if (result.content() == null || result.content().isBlank()) {
|
||||
throw new IllegalArgumentException("Extracted content is blank");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean should_validate(ExtractionResult _result, ExtractionConfig _config) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int priority() {
|
||||
return pluginPriority;
|
||||
}
|
||||
|
||||
public static void registerGenericValidator() {
|
||||
GenericValidator validator = new GenericValidator("non-empty-content", 200);
|
||||
ValidatorBridge.registerValidator(validator);
|
||||
}
|
||||
}
|
||||
```
|
||||
11
docs/snippets/java/plugins/quality_score_validator.md
Normal file
11
docs/snippets/java/plugins/quality_score_validator.md
Normal file
@@ -0,0 +1,11 @@
|
||||
```java title="Java"
|
||||
Validator qualityValidator = result -> {
|
||||
double score = result.getQualityScore() != null ? result.getQualityScore() : 0.0;
|
||||
|
||||
if (score < 0.5) {
|
||||
throw new ValidationException(
|
||||
String.format("Quality score too low: %.2f < 0.50", score)
|
||||
);
|
||||
}
|
||||
};
|
||||
```
|
||||
27
docs/snippets/java/plugins/stateful_plugin.md
Normal file
27
docs/snippets/java/plugins/stateful_plugin.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```java title="Java"
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
class StatefulPlugin implements PostProcessor {
|
||||
// Use atomic types for simple counters
|
||||
private final AtomicInteger callCount = new AtomicInteger(0);
|
||||
|
||||
// Use concurrent collections for complex state
|
||||
private final ConcurrentHashMap<String, String> cache = new ConcurrentHashMap<>();
|
||||
|
||||
@Override
|
||||
public ExtractionResult process(ExtractionResult result) {
|
||||
// Increment counter atomically
|
||||
callCount.incrementAndGet();
|
||||
|
||||
// Update cache (thread-safe)
|
||||
cache.put("last_mime", result.mimeType());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public int getCallCount() {
|
||||
return callCount.get();
|
||||
}
|
||||
}
|
||||
```
|
||||
11
docs/snippets/java/plugins/unregister_plugins.md
Normal file
11
docs/snippets/java/plugins/unregister_plugins.md
Normal file
@@ -0,0 +1,11 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
|
||||
try {
|
||||
// Unregister specific plugins
|
||||
Kreuzberg.unregisterPostProcessor("word-count");
|
||||
Kreuzberg.unregisterValidator("min-length");
|
||||
} catch (KreuzbergException e) {
|
||||
System.err.println("Failed to unregister: " + e.getMessage());
|
||||
}
|
||||
```
|
||||
31
docs/snippets/java/plugins/word_count_processor.md
Normal file
31
docs/snippets/java/plugins/word_count_processor.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.PostProcessor;
|
||||
import dev.kreuzberg.KreuzbergException;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class WordCountExample {
|
||||
public static void main(String[] args) {
|
||||
PostProcessor wordCount = result -> {
|
||||
long count = result.getContent().split("\\s+").length;
|
||||
|
||||
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
|
||||
metadata.put("word_count", count);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
try {
|
||||
Kreuzberg.registerPostProcessor("word-count", wordCount, 50);
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile("document.pdf");
|
||||
System.out.println("Word count: " + result.getMetadata().get("word_count"));
|
||||
} catch (IOException | KreuzbergException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user