Files
fil/docs/snippets/php/config/element_based_output.md

43 lines
1.2 KiB
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```php title="Element-Based Output (PHP)"
<?php
use Kreuzberg\ExtractionConfig;
use Kreuzberg\Kreuzberg;
// Configure element-based output
$config = new ExtractionConfig();
$config->setOutputFormat('element_based');
// Extract document
$result = Kreuzberg::extractFileSync('document.pdf', $config);
// Access elements
foreach ($result->getElements() as $element) {
echo "Type: " . $element->getElementType() . "\n";
echo "Text: " . substr($element->getText(), 0, 100) . "\n";
if ($element->getMetadata()->getPageNumber()) {
echo "Page: " . $element->getMetadata()->getPageNumber() . "\n";
}
if ($element->getMetadata()->getCoordinates()) {
$coords = $element->getMetadata()->getCoordinates();
echo sprintf("Coords: (%s, %s) - (%s, %s)\n",
$coords->getLeft(), $coords->getTop(),
$coords->getRight(), $coords->getBottom());
}
echo "---\n";
}
// Filter by element type
$titles = array_filter($result->getElements(), function($e) {
return $e->getElementType() === 'title';
});
foreach ($titles as $title) {
$level = $title->getMetadata()->getAdditional()['level'] ?? 'unknown';
echo "[{$level}] {$title->getText()}\n";
}
?>
```