Files
fil/docs/snippets/php/ocr/image_extraction.md

40 lines
1.1 KiB
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```php title="PHP"
<?php
declare(strict_types=1);
require_once __DIR__ . '/vendor/autoload.php';
use Kreuzberg\Kreuzberg;
use Kreuzberg\Config\ExtractionConfig;
use Kreuzberg\Config\ImageExtractionConfig;
// Extract images from documents alongside text
$config = new ExtractionConfig(
images: new ImageExtractionConfig(
extractImages: true,
embedAsBase64: false, // Save images to disk
maxImagesPerPage: 10
)
);
$kreuzberg = new Kreuzberg($config);
$result = $kreuzberg->extractFile('document_with_images.pdf');
echo "Extracted Content:\n";
echo $result->content . "\n\n";
if (!empty($result->images)) {
echo "Extracted " . count($result->images) . " images\n";
foreach ($result->images as $index => $image) {
echo "Image " . ($index + 1) . ":\n";
echo " Type: " . $image->mimeType . "\n";
echo " Size: " . strlen($image->data) . " bytes\n";
if (isset($image->width) && isset($image->height)) {
echo " Dimensions: " . $image->width . "x" . $image->height . "\n";
}
echo "\n";
}
}
?>
```