Files
fil/docs/snippets/php/advanced/vector_database_integration.md

71 lines
1.8 KiB
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```php title="PHP"
<?php
declare(strict_types=1);
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
use Kreuzberg\ChunkingConfig;
use Kreuzberg\EmbeddingConfig;
class VectorRecord {
public function __construct(
public string $id,
public string $content,
public array $embedding,
public array $metadata
) {}
}
function extractAndVectorize(
string $documentPath,
string $documentId
): array {
$config = new ExtractionConfig(
chunking: new ChunkingConfig(
maxCharacters: 512,
overlap: 50,
embedding: new EmbeddingConfig(
normalize: true,
batchSize: 32
)
)
);
$result = Kreuzberg::extractFileSync($documentPath, null, $config);
$records = [];
if ($result->getChunks()) {
foreach ($result->getChunks() as $index => $chunk) {
$embedding = $chunk->getEmbedding();
if ($embedding) {
$metadata = [
'document_id' => $documentId,
'chunk_index' => (string)$index,
'content_length' => (string)strlen($chunk->getContent()),
];
$records[] = new VectorRecord(
id: "{$documentId}_chunk_{$index}",
content: $chunk->getContent(),
embedding: $embedding,
metadata: $metadata
);
}
}
}
return $records;
}
// Usage
$records = extractAndVectorize('research_paper.pdf', 'doc_123');
foreach ($records as $record) {
echo "Vector ID: " . $record->id . "\n";
echo "Content length: " . strlen($record->content) . " characters\n";
echo "Embedding dimension: " . count($record->embedding) . "\n";
echo "---\n";
}
?>
```