This commit is contained in:
22
docs/snippets/java/api/batch_extract_bytes_sync.md
Normal file
22
docs/snippets/java/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.BatchBytesItem;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
byte[] doc1 = Files.readAllBytes(Paths.get("doc1.pdf"));
|
||||
byte[] doc2 = Files.readAllBytes(Paths.get("doc2.docx"));
|
||||
|
||||
List<BatchBytesItem> items = Arrays.asList(
|
||||
new BatchBytesItem(doc1, "application/pdf", null),
|
||||
new BatchBytesItem(doc2, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null)
|
||||
);
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
List<ExtractionResult> results = Kreuzberg.batchExtractBytesSync(items, config);
|
||||
System.out.println("Processed " + results.size() + " documents");
|
||||
```
|
||||
22
docs/snippets/java/api/batch_extract_files_sync.md
Normal file
22
docs/snippets/java/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.BatchFileItem;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
List<BatchFileItem> items = Arrays.asList(
|
||||
new BatchFileItem(Paths.get("doc1.pdf"), null),
|
||||
new BatchFileItem(Paths.get("doc2.docx"), null),
|
||||
new BatchFileItem(Paths.get("doc3.pptx"), null)
|
||||
);
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
List<ExtractionResult> results = Kreuzberg.batchExtractFilesSync(items, config);
|
||||
|
||||
for (ExtractionResult result : results) {
|
||||
System.out.println("Content length: " + result.content().length());
|
||||
}
|
||||
```
|
||||
30
docs/snippets/java/api/client_chunk_text.md
Normal file
30
docs/snippets/java/api/client_chunk_text.md
Normal file
@@ -0,0 +1,30 @@
|
||||
<!-- snippet:skip -->
|
||||
```java title="Java"
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.net.URI;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import java.util.List;
|
||||
|
||||
record ChunkRequest(String text, @JsonProperty("chunker_type") String chunkerType, ChunkConfig config) {}
|
||||
record ChunkConfig(@JsonProperty("max_characters") int maxCharacters, int overlap, boolean trim) {}
|
||||
record ChunkItem(String content, @JsonProperty("byte_start") int byteStart, @JsonProperty("chunk_index") int chunkIndex) {}
|
||||
|
||||
HttpClient client = HttpClient.newHttpClient();
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
ChunkRequest req = new ChunkRequest("Your long text here...", "text", new ChunkConfig(1000, 50, true));
|
||||
String json = mapper.writeValueAsString(req);
|
||||
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://localhost:8000/chunk"))
|
||||
.header("Content-Type", "application/json")
|
||||
.POST(HttpRequest.BodyPublishers.ofString(json))
|
||||
.build();
|
||||
|
||||
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
var result = mapper.readTree(response.body());
|
||||
System.out.println("Created " + result.get("chunk_count").asInt() + " chunks");
|
||||
```
|
||||
22
docs/snippets/java/api/client_extract_single_file.md
Normal file
22
docs/snippets/java/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,22 @@
|
||||
```java title="Java"
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
HttpClient client = HttpClient.newHttpClient();
|
||||
|
||||
try (var fileStream = Files.newInputStream(Paths.get("document.pdf"))) {
|
||||
byte[] content = fileStream.readAllBytes();
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://localhost:8000/extract"))
|
||||
.header("Content-Type", "application/octet-stream")
|
||||
.POST(HttpRequest.BodyPublishers.ofByteArray(content))
|
||||
.build();
|
||||
|
||||
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
System.out.println(response.body());
|
||||
}
|
||||
```
|
||||
28
docs/snippets/java/api/combining_all_features.md
Normal file
28
docs/snippets/java/api/combining_all_features.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.*;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Optional;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.withOcr(Optional.of(OcrConfig.builder()
|
||||
.withBackend("tesseract")
|
||||
.withLanguages(Optional.of(java.util.List.of("eng", "deu")))
|
||||
.build()))
|
||||
.withChunking(Optional.of(ChunkingConfig.builder()
|
||||
.withMaxChars(Optional.of(512L))
|
||||
.withMaxOverlap(Optional.of(50L))
|
||||
.build()))
|
||||
.withEnableQualityProcessing(true)
|
||||
.build();
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), config);
|
||||
System.out.println("Content: " + result.content().substring(0, 100) + "...");
|
||||
if (result.tables() != null) {
|
||||
System.out.println("Tables: " + result.tables().size());
|
||||
}
|
||||
if (result.qualityScore() != null) {
|
||||
System.out.println("Quality: " + result.qualityScore());
|
||||
}
|
||||
```
|
||||
16
docs/snippets/java/api/error_handling.md
Normal file
16
docs/snippets/java/api/error_handling.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.KreuzbergRsException;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
try {
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
ExtractionResult result = Kreuzberg.extractFileSync(Paths.get("missing.pdf"), config);
|
||||
System.out.println(result.content());
|
||||
} catch (KreuzbergRsException e) {
|
||||
System.err.println("Extraction failed: " + e.getMessage());
|
||||
System.err.println("Error code: " + e.getCode());
|
||||
}
|
||||
```
|
||||
28
docs/snippets/java/api/error_handling_extract.md
Normal file
28
docs/snippets/java/api/error_handling_extract.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```java title="Java"
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
HttpClient client = HttpClient.newHttpClient();
|
||||
byte[] fileBytes = Files.readAllBytes(Paths.get("document.pdf"));
|
||||
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://localhost:8000/extract"))
|
||||
.header("Content-Type", "application/octet-stream")
|
||||
.POST(HttpRequest.BodyPublishers.ofByteArray(fileBytes))
|
||||
.build();
|
||||
|
||||
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
|
||||
if (response.statusCode() != 200) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
var error = mapper.readTree(response.body());
|
||||
System.err.println("Error: " + error.get("error_type").asText() + " - " + error.get("message").asText());
|
||||
} else {
|
||||
System.out.println("Success: " + response.body());
|
||||
}
|
||||
```
|
||||
14
docs/snippets/java/api/extract_bytes_async.md
Normal file
14
docs/snippets/java/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
byte[] data = Files.readAllBytes(Paths.get("document.pdf"));
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
ExtractionResult result = Kreuzberg.extractBytes(data, "application/pdf", config);
|
||||
|
||||
System.out.println(result.content());
|
||||
System.out.println(result.mimeType());
|
||||
```
|
||||
14
docs/snippets/java/api/extract_bytes_sync.md
Normal file
14
docs/snippets/java/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
byte[] data = Files.readAllBytes(Paths.get("document.pdf"));
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
ExtractionResult result = Kreuzberg.extractBytesSync(data, "application/pdf", config);
|
||||
|
||||
System.out.println(result.content());
|
||||
System.out.println(result.mimeType());
|
||||
```
|
||||
12
docs/snippets/java/api/extract_file_async.md
Normal file
12
docs/snippets/java/api/extract_file_async.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
ExtractionResult result = Kreuzberg.extractFile(Paths.get("document.pdf"), config);
|
||||
|
||||
System.out.println(result.content());
|
||||
System.out.println(result.mimeType());
|
||||
```
|
||||
13
docs/snippets/java/api/extract_file_sync.md
Normal file
13
docs/snippets/java/api/extract_file_sync.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
ExtractionConfig config = ExtractionConfig.builder().build();
|
||||
ExtractionResult result = Kreuzberg.extractFileSync(Paths.get("document.pdf"), config);
|
||||
|
||||
System.out.println(result.content());
|
||||
System.out.println("Tables: " + (result.tables() != null ? result.tables().size() : 0));
|
||||
System.out.println("Metadata: " + result.metadata());
|
||||
```
|
||||
Reference in New Issue
Block a user