Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.BatchBytesItem;
import dev.kreuzberg.ExtractionConfig;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Arrays;
byte[] doc1 = Files.readAllBytes(Paths.get("doc1.pdf"));
byte[] doc2 = Files.readAllBytes(Paths.get("doc2.docx"));
List<BatchBytesItem> items = Arrays.asList(
new BatchBytesItem(doc1, "application/pdf", null),
new BatchBytesItem(doc2, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null)
);
ExtractionConfig config = ExtractionConfig.builder().build();
List<ExtractionResult> results = Kreuzberg.batchExtractBytesSync(items, config);
System.out.println("Processed " + results.size() + " documents");
```

View File

@@ -0,0 +1,22 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.BatchFileItem;
import dev.kreuzberg.ExtractionConfig;
import java.nio.file.Paths;
import java.util.List;
import java.util.Arrays;
List<BatchFileItem> items = Arrays.asList(
new BatchFileItem(Paths.get("doc1.pdf"), null),
new BatchFileItem(Paths.get("doc2.docx"), null),
new BatchFileItem(Paths.get("doc3.pptx"), null)
);
ExtractionConfig config = ExtractionConfig.builder().build();
List<ExtractionResult> results = Kreuzberg.batchExtractFilesSync(items, config);
for (ExtractionResult result : results) {
System.out.println("Content length: " + result.content().length());
}
```

View File

@@ -0,0 +1,30 @@
<!-- snippet:skip -->
```java title="Java"
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.net.URI;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.List;
record ChunkRequest(String text, @JsonProperty("chunker_type") String chunkerType, ChunkConfig config) {}
record ChunkConfig(@JsonProperty("max_characters") int maxCharacters, int overlap, boolean trim) {}
record ChunkItem(String content, @JsonProperty("byte_start") int byteStart, @JsonProperty("chunk_index") int chunkIndex) {}
HttpClient client = HttpClient.newHttpClient();
ObjectMapper mapper = new ObjectMapper();
ChunkRequest req = new ChunkRequest("Your long text here...", "text", new ChunkConfig(1000, 50, true));
String json = mapper.writeValueAsString(req);
var request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:8000/chunk"))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(json))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
var result = mapper.readTree(response.body());
System.out.println("Created " + result.get("chunk_count").asInt() + " chunks");
```

View File

@@ -0,0 +1,22 @@
```java title="Java"
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
HttpClient client = HttpClient.newHttpClient();
try (var fileStream = Files.newInputStream(Paths.get("document.pdf"))) {
byte[] content = fileStream.readAllBytes();
var request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:8000/extract"))
.header("Content-Type", "application/octet-stream")
.POST(HttpRequest.BodyPublishers.ofByteArray(content))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
}
```

View File

@@ -0,0 +1,28 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.*;
import java.nio.file.Paths;
import java.util.Optional;
ExtractionConfig config = ExtractionConfig.builder()
.withOcr(Optional.of(OcrConfig.builder()
.withBackend("tesseract")
.withLanguages(Optional.of(java.util.List.of("eng", "deu")))
.build()))
.withChunking(Optional.of(ChunkingConfig.builder()
.withMaxChars(Optional.of(512L))
.withMaxOverlap(Optional.of(50L))
.build()))
.withEnableQualityProcessing(true)
.build();
ExtractionResult result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), config);
System.out.println("Content: " + result.content().substring(0, 100) + "...");
if (result.tables() != null) {
System.out.println("Tables: " + result.tables().size());
}
if (result.qualityScore() != null) {
System.out.println("Quality: " + result.qualityScore());
}
```

View File

@@ -0,0 +1,16 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.KreuzbergRsException;
import java.nio.file.Paths;
try {
ExtractionConfig config = ExtractionConfig.builder().build();
ExtractionResult result = Kreuzberg.extractFileSync(Paths.get("missing.pdf"), config);
System.out.println(result.content());
} catch (KreuzbergRsException e) {
System.err.println("Extraction failed: " + e.getMessage());
System.err.println("Error code: " + e.getCode());
}
```

View File

@@ -0,0 +1,28 @@
```java title="Java"
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import com.fasterxml.jackson.databind.ObjectMapper;
HttpClient client = HttpClient.newHttpClient();
byte[] fileBytes = Files.readAllBytes(Paths.get("document.pdf"));
var request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:8000/extract"))
.header("Content-Type", "application/octet-stream")
.POST(HttpRequest.BodyPublishers.ofByteArray(fileBytes))
.build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
ObjectMapper mapper = new ObjectMapper();
var error = mapper.readTree(response.body());
System.err.println("Error: " + error.get("error_type").asText() + " - " + error.get("message").asText());
} else {
System.out.println("Success: " + response.body());
}
```

View File

@@ -0,0 +1,14 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import java.nio.file.Files;
import java.nio.file.Paths;
byte[] data = Files.readAllBytes(Paths.get("document.pdf"));
ExtractionConfig config = ExtractionConfig.builder().build();
ExtractionResult result = Kreuzberg.extractBytes(data, "application/pdf", config);
System.out.println(result.content());
System.out.println(result.mimeType());
```

View File

@@ -0,0 +1,14 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import java.nio.file.Files;
import java.nio.file.Paths;
byte[] data = Files.readAllBytes(Paths.get("document.pdf"));
ExtractionConfig config = ExtractionConfig.builder().build();
ExtractionResult result = Kreuzberg.extractBytesSync(data, "application/pdf", config);
System.out.println(result.content());
System.out.println(result.mimeType());
```

View File

@@ -0,0 +1,12 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import java.nio.file.Paths;
ExtractionConfig config = ExtractionConfig.builder().build();
ExtractionResult result = Kreuzberg.extractFile(Paths.get("document.pdf"), config);
System.out.println(result.content());
System.out.println(result.mimeType());
```

View File

@@ -0,0 +1,13 @@
```java title="Java"
import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import java.nio.file.Paths;
ExtractionConfig config = ExtractionConfig.builder().build();
ExtractionResult result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), config);
System.out.println(result.content());
System.out.println("Tables: " + (result.tables() != null ? result.tables().size() : 0));
System.out.println("Metadata: " + result.metadata());
```