Files
fil/docs/snippets/go/advanced/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.5 KiB

package main

import (
	"fmt"

	"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)

type VectorRecord struct {
	ID        string
	Embedding []float32
	Content   string
	Metadata  map[string]string
}

func extractAndVectorize(documentPath string, documentID string) ([]VectorRecord, error) {
	maxChars := 512
	maxOverlap := 50
	normalize := true
	batchSize := int32(32)

	config := &kreuzberg.ExtractionConfig{
		Chunking: &kreuzberg.ChunkingConfig{
			MaxChars:   &maxChars,
			MaxOverlap: &maxOverlap,
			Embedding: &kreuzberg.EmbeddingConfig{
				Model:     kreuzberg.EmbeddingModelType_Preset("balanced"),
				Normalize: &normalize,
				BatchSize: &batchSize,
			},
		},
	}

	result, err := kreuzberg.ExtractFileSync(documentPath, config)
	if err != nil {
		return nil, err
	}

	var vectorRecords []VectorRecord
	for index, chunk := range result.Chunks {
		record := VectorRecord{
			ID:        fmt.Sprintf("%s_chunk_%d", documentID, index),
			Content:   chunk.Content,
			Embedding: chunk.Embedding,
			Metadata: map[string]string{
				"document_id":  documentID,
				"chunk_index":  fmt.Sprintf("%d", index),
				"content_length": fmt.Sprintf("%d", len(chunk.Content)),
			},
		}
		vectorRecords = append(vectorRecords, record)
	}

	storeInVectorDatabase(vectorRecords)
	return vectorRecords, nil
}

func storeInVectorDatabase(records []VectorRecord) {
	for _, record := range records {
		if len(record.Embedding) > 0 {
			fmt.Printf("Storing %s: %d chars, %d dims\n",
				record.ID, len(record.Content), len(record.Embedding))
		}
	}
}