This commit is contained in:
42
docs/snippets/php/config/element_based_output.md
Normal file
42
docs/snippets/php/config/element_based_output.md
Normal file
@@ -0,0 +1,42 @@
|
||||
```php title="Element-Based Output (PHP)"
|
||||
<?php
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
|
||||
// Configure element-based output
|
||||
$config = new ExtractionConfig();
|
||||
$config->setOutputFormat('element_based');
|
||||
|
||||
// Extract document
|
||||
$result = Kreuzberg::extractFileSync('document.pdf', $config);
|
||||
|
||||
// Access elements
|
||||
foreach ($result->getElements() as $element) {
|
||||
echo "Type: " . $element->getElementType() . "\n";
|
||||
echo "Text: " . substr($element->getText(), 0, 100) . "\n";
|
||||
|
||||
if ($element->getMetadata()->getPageNumber()) {
|
||||
echo "Page: " . $element->getMetadata()->getPageNumber() . "\n";
|
||||
}
|
||||
|
||||
if ($element->getMetadata()->getCoordinates()) {
|
||||
$coords = $element->getMetadata()->getCoordinates();
|
||||
echo sprintf("Coords: (%s, %s) - (%s, %s)\n",
|
||||
$coords->getLeft(), $coords->getTop(),
|
||||
$coords->getRight(), $coords->getBottom());
|
||||
}
|
||||
|
||||
echo "---\n";
|
||||
}
|
||||
|
||||
// Filter by element type
|
||||
$titles = array_filter($result->getElements(), function($e) {
|
||||
return $e->getElementType() === 'title';
|
||||
});
|
||||
|
||||
foreach ($titles as $title) {
|
||||
$level = $title->getMetadata()->getAdditional()['level'] ?? 'unknown';
|
||||
echo "[{$level}] {$title->getText()}\n";
|
||||
}
|
||||
?>
|
||||
```
|
||||
Reference in New Issue
Block a user