Files
fil/docs/snippets/php/config/element_based_output.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.2 KiB

<?php
use Kreuzberg\ExtractionConfig;
use Kreuzberg\Kreuzberg;

// Configure element-based output
$config = new ExtractionConfig();
$config->setOutputFormat('element_based');

// Extract document
$result = Kreuzberg::extractFileSync('document.pdf', $config);

// Access elements
foreach ($result->getElements() as $element) {
    echo "Type: " . $element->getElementType() . "\n";
    echo "Text: " . substr($element->getText(), 0, 100) . "\n";

    if ($element->getMetadata()->getPageNumber()) {
        echo "Page: " . $element->getMetadata()->getPageNumber() . "\n";
    }

    if ($element->getMetadata()->getCoordinates()) {
        $coords = $element->getMetadata()->getCoordinates();
        echo sprintf("Coords: (%s, %s) - (%s, %s)\n",
            $coords->getLeft(), $coords->getTop(),
            $coords->getRight(), $coords->getBottom());
    }

    echo "---\n";
}

// Filter by element type
$titles = array_filter($result->getElements(), function($e) {
    return $e->getElementType() === 'title';
});

foreach ($titles as $title) {
    $level = $title->getMetadata()->getAdditional()['level'] ?? 'unknown';
    echo "[{$level}] {$title->getText()}\n";
}
?>