Files
fil/docs/snippets/java/advanced/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

2.5 KiB

import dev.kreuzberg.Kreuzberg;
import dev.kreuzberg.ExtractionResult;
import dev.kreuzberg.ExtractionConfig;
import dev.kreuzberg.ChunkingConfig;
import dev.kreuzberg.EmbeddingConfig;
import dev.kreuzberg.EmbeddingModelType;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class VectorDatabaseIntegration {
    public static class VectorRecord {
        public String id;
        public float[] embedding;
        public String content;
        public Map<String, String> metadata;
    }

    public static List<VectorRecord> extractAndVectorize(String documentPath, String documentId) throws Exception {
        ExtractionConfig config = ExtractionConfig.builder()
            .chunking(ChunkingConfig.builder()
                .maxChars(512)
                .maxOverlap(50)
                .embedding(EmbeddingConfig.builder()
                    .model(EmbeddingModelType.preset("balanced"))
                    .normalize(true)
                    .batchSize(32)
                    .build())
                .build())
            .build();

        ExtractionResult result = Kreuzberg.extractFile(documentPath, config);
        List<Object> chunks = result.getChunks() != null ? result.getChunks() : List.of();

        List<VectorRecord> vectorRecords = new java.util.ArrayList<>();
        for (int index = 0; index < chunks.size(); index++) {
            VectorRecord record = new VectorRecord();
            record.id = documentId + "_chunk_" + index;
            record.metadata = new HashMap<>();
            record.metadata.put("document_id", documentId);
            record.metadata.put("chunk_index", String.valueOf(index));

            if (chunk instanceof java.util.Map) {
                Map<String, Object> chunkMap = (Map<String, Object>) chunks.get(index);
                record.content = (String) chunkMap.get("content");
                record.embedding = (float[]) chunkMap.get("embedding");
                record.metadata.put("content_length", String.valueOf(record.content.length()));
            }

            vectorRecords.add(record);
        }

        storeInVectorDatabase(vectorRecords);
        return vectorRecords;
    }

    private static void storeInVectorDatabase(List<VectorRecord> records) {
        for (VectorRecord record : records) {
            if (record.embedding != null && record.embedding.length > 0) {
                System.out.println("Storing " + record.id + ": " + record.content.length()
                    + " chars, " + record.embedding.length + " dims");
            }
        }
    }
}