Files
fil/docs/snippets/swift/advanced/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.4 KiB

import Foundation
import Kreuzberg
import RustBridge

struct VectorRecord {
    let id: String
    let content: String
    let embedding: [Float]
    let metadata: [String: String]
}

func extractAndVectorize(documentPath: String, documentId: String) throws -> [VectorRecord] {
    let configJson = """
    {
        "chunking": {
            "max_characters": 512,
            "overlap": 50,
            "embedding": {
                "model": {"preset": {"name": "balanced"}},
                "normalize": true,
                "batch_size": 32
            }
        }
    }
    """

    let config = try extractionConfigFromJson(configJson)
    let result = try extractFileSync(documentPath, nil, config)

    var records: [VectorRecord] = []
    if let chunks = result.chunks() {
        for (index, chunk) in chunks.enumerated() {
            guard let embedding = chunk.embedding() else { continue }
            let content = chunk.content().toString()
            let metadata: [String: String] = [
                "document_id": documentId,
                "chunk_index": String(index),
                "content_length": String(content.count),
            ]
            records.append(VectorRecord(
                id: "\(documentId)_chunk_\(index)",
                content: content,
                embedding: embedding.map { $0 },
                metadata: metadata
            ))
        }
    }
    return records
}