Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
```java title="Java"
// Java does not provide bulk clearing functionality in v4.0.0
// Unregister plugins individually using unregisterPostProcessor() and unregisterValidator()
```

View File

@@ -0,0 +1,79 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.EmbeddingBackendBridge;
import dev.kreuzberg.EmbeddingConfig;
import dev.kreuzberg.EmbeddingModelType;
import dev.kreuzberg.IEmbeddingBackend;
import dev.kreuzberg.KreuzbergRsException;
import java.util.ArrayList;
import java.util.List;
public class EmbeddingBackendExample {
/**
* Wrap an already-loaded embedder so kreuzberg can call back into it during
* chunking and standalone embed requests.
*/
static final class MyEmbedder implements IEmbeddingBackend {
@Override
public String name() {
return "my-embedder";
}
@Override
public String version() {
return "1.0.0";
}
@Override
public void initialize() {
// Optional warm-up; runs once at registration before dimensions() is cached.
}
@Override
public void shutdown() {
// Optional cleanup.
}
@Override
public long dimensions() {
// Captured once at registration; the dispatcher uses this for shape validation.
return 768L;
}
@Override
public List<List<Float>> embed(List<String> texts) {
// Delegate to the already-loaded host model.
List<List<Float>> out = new ArrayList<>(texts.size());
for (int i = 0; i < texts.size(); i++) {
List<Float> row = new ArrayList<>(768);
for (int j = 0; j < 768; j++) {
row.add(0.0f);
}
out.add(row);
}
return out;
}
}
public static void main(String[] args) throws Exception {
// Register once at startup.
EmbeddingBackendBridge.registerEmbeddingBackend(new MyEmbedder());
try {
EmbeddingConfig config = EmbeddingConfig.builder()
.model(new EmbeddingModelType.Plugin("my-embedder"))
// Optional: bound the wait on a hung backend (default 60s; null disables).
.maxEmbedDurationSecs(30L)
.build();
List<String> texts = List.of("Hello, world!", "Second text");
List<List<Float>> vectors = Kreuzberg.embedTexts(texts, config);
System.out.println("Generated " + vectors.size() + " vectors");
} catch (KreuzbergRsException e) {
e.printStackTrace();
} finally {
EmbeddingBackendBridge.unregisterEmbeddingBackend("my-embedder");
}
}
}
```

View File

@@ -0,0 +1,17 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
public class CustomExtractorExample {
public static void main(String[] args) {
try {
ExtractionResult result = Kreuzberg.extractFile("document.json");
System.out.println("Extracted content length: " + result.getContent().length());
} catch (IOException | KreuzbergException e) {
e.printStackTrace();
}
}
}
```

View File

@@ -0,0 +1,4 @@
```java title="Java"
// Java does not provide plugin listing functionality in v4.0.0
// Plugins are registered and managed through the FFI layer
```

View File

@@ -0,0 +1,34 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.Validator;
import dev.kreuzberg.ValidationException;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
public class MinLengthValidatorExample {
public static void main(String[] args) {
int minLength = 100;
Validator minLengthValidator = result -> {
if (result.getContent().length() < minLength) {
throw new ValidationException(
"Content too short: " + result.getContent().length() +
" < " + minLength
);
}
};
try {
Kreuzberg.registerValidator("min-length", minLengthValidator, 100);
ExtractionResult result = Kreuzberg.extractFile("document.pdf");
System.out.println("Validation passed!");
} catch (ValidationException e) {
System.err.println("Validation failed: " + e.getMessage());
} catch (IOException | KreuzbergException e) {
e.printStackTrace();
}
}
}
```

View File

@@ -0,0 +1,50 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.PostProcessor;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
public class PdfMetadataExtractorExample {
private static final Logger logger = Logger.getLogger(
PdfMetadataExtractorExample.class.getName()
);
public static void main(String[] args) {
AtomicInteger processedCount = new AtomicInteger(0);
PostProcessor pdfMetadata = result -> {
if (!result.getMimeType().equals("application/pdf")) {
return result;
}
processedCount.incrementAndGet();
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
metadata.put("pdf_processed", true);
metadata.put("processing_timestamp", System.currentTimeMillis());
logger.info("Processed PDF: " + processedCount.get());
return result;
};
try {
Kreuzberg.registerPostProcessor("pdf-metadata-extractor", pdfMetadata, 50);
logger.info("PDF metadata extractor initialized");
ExtractionResult result = Kreuzberg.extractFile("document.pdf");
System.out.println("PDF processed: " + result.getMetadata().get("pdf_processed"));
logger.info("Processed " + processedCount.get() + " PDFs");
} catch (IOException | KreuzbergException e) {
e.printStackTrace();
}
}
}
```

View File

@@ -0,0 +1,16 @@
```java title="Java"
import dev.kreuzberg.PostProcessor;
import java.util.HashMap;
import java.util.Map;
PostProcessor pdfOnly = result -> {
if (!result.getMimeType().equals("application/pdf")) {
return result;
}
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
metadata.put("pdf_processed", true);
return result;
};
```

View File

@@ -0,0 +1,17 @@
<!-- snippet:skip reason="The Java binding generates IDocumentExtractor + DocumentExtractorBridge but the InternalDocument Java class referenced by the interface is not generated by the alef Java backend. Custom DocumentExtractor implementations cannot construct return values until the alef-generated Panama type for InternalDocument lands." -->
```java title="Java"
import dev.kreuzberg.IDocumentExtractor;
import dev.kreuzberg.DocumentExtractorBridge;
// Java's Panama FFM binding exposes the IDocumentExtractor interface and the
// DocumentExtractorBridge.registerDocumentExtractor / unregisterDocumentExtractor
// helpers, but the InternalDocument return type is referenced from the
// interface signature without a corresponding generated Java class. Until the
// alef Java backend emits dev.kreuzberg.InternalDocument, custom Java
// DocumentExtractor implementations cannot return a value from extract_bytes /
// extract_file.
//
// Implement the extractor in Rust as `Plugin + DocumentExtractor` and register
// it via `register_document_extractor` in a Rust shim crate that links
// kreuzberg before the JVM loads the native library.
```

View File

@@ -0,0 +1,22 @@
```java title="Java"
import java.util.logging.Logger;
import java.util.logging.Level;
class MyPlugin implements PostProcessor {
private static final Logger logger = Logger.getLogger(MyPlugin.class.getName());
@Override
public ExtractionResult process(ExtractionResult result) {
logger.info("Processing " + result.mimeType() +
" (" + result.content().length() + " bytes)");
// Processing...
if (result.content().isEmpty()) {
logger.warning("Processing resulted in empty content");
}
return result;
}
}
```

View File

@@ -0,0 +1,37 @@
```java title="Java"
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.PostProcessor;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.*;
class PostProcessorTest {
@Test
void testWordCountProcessor() {
PostProcessor processor = result -> {
long count = result.getContent().split("\\s+").length;
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
metadata.put("word_count", count);
return result;
};
ExtractionResult input = new ExtractionResult(
"Hello world test",
"text/plain",
new HashMap<>(),
java.util.List.of(),
java.util.List.of(),
java.util.List.of(),
java.util.List.of(),
true
);
ExtractionResult output = processor.process(input);
assertEquals(3, output.getMetadata().get("word_count"));
}
}
```

View File

@@ -0,0 +1,61 @@
```java title="Java"
import dev.kreuzberg.IValidator;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ValidatorBridge;
// Generic validator pattern: every IValidator has the same shape.
// name() keys the registry, priority() orders execution (higher = earlier),
// should_validate() is a fast skip-check, and validate() throws on failure.
public class GenericValidator implements IValidator {
private final String pluginName;
private final int pluginPriority;
public GenericValidator(String pluginName, int pluginPriority) {
this.pluginName = pluginName;
this.pluginPriority = pluginPriority;
}
@Override
public String name() {
return pluginName;
}
@Override
public String version() {
return "1.0.0";
}
@Override
public void initialize() {
// Optional: open resources, load config files, etc.
}
@Override
public void shutdown() {
// Optional: release resources held in initialize().
}
@Override
public void validate(ExtractionResult result, ExtractionConfig config) throws Exception {
if (result.content() == null || result.content().isBlank()) {
throw new IllegalArgumentException("Extracted content is blank");
}
}
@Override
public boolean should_validate(ExtractionResult _result, ExtractionConfig _config) {
return true;
}
@Override
public int priority() {
return pluginPriority;
}
public static void registerGenericValidator() {
GenericValidator validator = new GenericValidator("non-empty-content", 200);
ValidatorBridge.registerValidator(validator);
}
}
```

View File

@@ -0,0 +1,11 @@
```java title="Java"
Validator qualityValidator = result -> {
double score = result.getQualityScore() != null ? result.getQualityScore() : 0.0;
if (score < 0.5) {
throw new ValidationException(
String.format("Quality score too low: %.2f < 0.50", score)
);
}
};
```

View File

@@ -0,0 +1,27 @@
```java title="Java"
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
class StatefulPlugin implements PostProcessor {
// Use atomic types for simple counters
private final AtomicInteger callCount = new AtomicInteger(0);
// Use concurrent collections for complex state
private final ConcurrentHashMap<String, String> cache = new ConcurrentHashMap<>();
@Override
public ExtractionResult process(ExtractionResult result) {
// Increment counter atomically
callCount.incrementAndGet();
// Update cache (thread-safe)
cache.put("last_mime", result.mimeType());
return result;
}
public int getCallCount() {
return callCount.get();
}
}
```

View File

@@ -0,0 +1,11 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
try {
// Unregister specific plugins
Kreuzberg.unregisterPostProcessor("word-count");
Kreuzberg.unregisterValidator("min-length");
} catch (KreuzbergException e) {
System.err.println("Failed to unregister: " + e.getMessage());
}
```

View File

@@ -0,0 +1,31 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.PostProcessor;
import dev.kreuzberg.KreuzbergException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class WordCountExample {
public static void main(String[] args) {
PostProcessor wordCount = result -> {
long count = result.getContent().split("\\s+").length;
Map<String, Object> metadata = new HashMap<>(result.getMetadata());
metadata.put("word_count", count);
return result;
};
try {
Kreuzberg.registerPostProcessor("word-count", wordCount, 50);
ExtractionResult result = Kreuzberg.extractFile("document.pdf");
System.out.println("Word count: " + result.getMetadata().get("word_count"));
} catch (IOException | KreuzbergException e) {
e.printStackTrace();
}
}
}
```