This commit is contained in:
67
docs/snippets/java/advanced/vector_database_integration.md
Normal file
67
docs/snippets/java/advanced/vector_database_integration.md
Normal file
@@ -0,0 +1,67 @@
|
||||
```java title="Java"
|
||||
import dev.kreuzberg.Kreuzberg;
|
||||
import dev.kreuzberg.ExtractionResult;
|
||||
import dev.kreuzberg.ExtractionConfig;
|
||||
import dev.kreuzberg.ChunkingConfig;
|
||||
import dev.kreuzberg.EmbeddingConfig;
|
||||
import dev.kreuzberg.EmbeddingModelType;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class VectorDatabaseIntegration {
|
||||
public static class VectorRecord {
|
||||
public String id;
|
||||
public float[] embedding;
|
||||
public String content;
|
||||
public Map<String, String> metadata;
|
||||
}
|
||||
|
||||
public static List<VectorRecord> extractAndVectorize(String documentPath, String documentId) throws Exception {
|
||||
ExtractionConfig config = ExtractionConfig.builder()
|
||||
.chunking(ChunkingConfig.builder()
|
||||
.maxChars(512)
|
||||
.maxOverlap(50)
|
||||
.embedding(EmbeddingConfig.builder()
|
||||
.model(EmbeddingModelType.preset("balanced"))
|
||||
.normalize(true)
|
||||
.batchSize(32)
|
||||
.build())
|
||||
.build())
|
||||
.build();
|
||||
|
||||
ExtractionResult result = Kreuzberg.extractFile(documentPath, config);
|
||||
List<Object> chunks = result.getChunks() != null ? result.getChunks() : List.of();
|
||||
|
||||
List<VectorRecord> vectorRecords = new java.util.ArrayList<>();
|
||||
for (int index = 0; index < chunks.size(); index++) {
|
||||
VectorRecord record = new VectorRecord();
|
||||
record.id = documentId + "_chunk_" + index;
|
||||
record.metadata = new HashMap<>();
|
||||
record.metadata.put("document_id", documentId);
|
||||
record.metadata.put("chunk_index", String.valueOf(index));
|
||||
|
||||
if (chunk instanceof java.util.Map) {
|
||||
Map<String, Object> chunkMap = (Map<String, Object>) chunks.get(index);
|
||||
record.content = (String) chunkMap.get("content");
|
||||
record.embedding = (float[]) chunkMap.get("embedding");
|
||||
record.metadata.put("content_length", String.valueOf(record.content.length()));
|
||||
}
|
||||
|
||||
vectorRecords.add(record);
|
||||
}
|
||||
|
||||
storeInVectorDatabase(vectorRecords);
|
||||
return vectorRecords;
|
||||
}
|
||||
|
||||
private static void storeInVectorDatabase(List<VectorRecord> records) {
|
||||
for (VectorRecord record : records) {
|
||||
if (record.embedding != null && record.embedding.length > 0) {
|
||||
System.out.println("Storing " + record.id + ": " + record.content.length()
|
||||
+ " chars, " + record.embedding.length + " dims");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user