Files
fil/docs/snippets/php/llm/structured_extraction.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.3 KiB

<?php

declare(strict_types=1);

require_once __DIR__ . '/vendor/autoload.php';

use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
use Kreuzberg\LlmConfig;
use Kreuzberg\StructuredExtractionConfig;

$schema = json_encode([
    'type' => 'object',
    'properties' => [
        'title' => ['type' => 'string'],
        'authors' => ['type' => 'array', 'items' => ['type' => 'string']],
        'date' => ['type' => 'string'],
    ],
    'required' => ['title', 'authors', 'date'],
    'additionalProperties' => false,
], JSON_THROW_ON_ERROR);

$llm = new LlmConfig(
    model: 'openai/gpt-4o-mini',
    apiKey: null,
    baseUrl: null,
    timeoutSecs: null,
    maxRetries: null,
    temperature: null,
    maxTokens: null,
);

$structured = StructuredExtractionConfig::from_json(json_encode([
    'schema' => json_decode($schema, true),
    'schema_name' => 'paper_metadata',
    'strict' => true,
    'llm' => [
        'model' => $llm->model,
    ],
], JSON_THROW_ON_ERROR));

$config = new ExtractionConfig();
$config->structured_extraction = $structured;

$kreuzberg = new Kreuzberg($config);
$result = $kreuzberg->extractFile('paper.pdf');

if ($result->structured_output !== null) {
    echo $result->structured_output, "\n";
}