Files
fil/docs/snippets/php/advanced/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

<?php
declare(strict_types=1);

use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
use Kreuzberg\ChunkingConfig;
use Kreuzberg\EmbeddingConfig;

class VectorRecord {
    public function __construct(
        public string $id,
        public string $content,
        public array $embedding,
        public array $metadata
    ) {}
}

function extractAndVectorize(
    string $documentPath,
    string $documentId
): array {
    $config = new ExtractionConfig(
        chunking: new ChunkingConfig(
            maxCharacters: 512,
            overlap: 50,
            embedding: new EmbeddingConfig(
                normalize: true,
                batchSize: 32
            )
        )
    );

    $result = Kreuzberg::extractFileSync($documentPath, null, $config);

    $records = [];
    if ($result->getChunks()) {
        foreach ($result->getChunks() as $index => $chunk) {
            $embedding = $chunk->getEmbedding();
            if ($embedding) {
                $metadata = [
                    'document_id' => $documentId,
                    'chunk_index' => (string)$index,
                    'content_length' => (string)strlen($chunk->getContent()),
                ];

                $records[] = new VectorRecord(
                    id: "{$documentId}_chunk_{$index}",
                    content: $chunk->getContent(),
                    embedding: $embedding,
                    metadata: $metadata
                );
            }
        }
    }

    return $records;
}

// Usage
$records = extractAndVectorize('research_paper.pdf', 'doc_123');

foreach ($records as $record) {
    echo "Vector ID: " . $record->id . "\n";
    echo "Content length: " . strlen($record->content) . " characters\n";
    echo "Embedding dimension: " . count($record->embedding) . "\n";
    echo "---\n";
}
?>