This commit is contained in:
39
docs/snippets/php/ocr/image_extraction.md
Normal file
39
docs/snippets/php/ocr/image_extraction.md
Normal file
@@ -0,0 +1,39 @@
|
||||
```php title="PHP"
|
||||
<?php
|
||||
declare(strict_types=1);
|
||||
|
||||
require_once __DIR__ . '/vendor/autoload.php';
|
||||
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\Config\ExtractionConfig;
|
||||
use Kreuzberg\Config\ImageExtractionConfig;
|
||||
|
||||
// Extract images from documents alongside text
|
||||
$config = new ExtractionConfig(
|
||||
images: new ImageExtractionConfig(
|
||||
extractImages: true,
|
||||
embedAsBase64: false, // Save images to disk
|
||||
maxImagesPerPage: 10
|
||||
)
|
||||
);
|
||||
|
||||
$kreuzberg = new Kreuzberg($config);
|
||||
$result = $kreuzberg->extractFile('document_with_images.pdf');
|
||||
|
||||
echo "Extracted Content:\n";
|
||||
echo $result->content . "\n\n";
|
||||
|
||||
if (!empty($result->images)) {
|
||||
echo "Extracted " . count($result->images) . " images\n";
|
||||
foreach ($result->images as $index => $image) {
|
||||
echo "Image " . ($index + 1) . ":\n";
|
||||
echo " Type: " . $image->mimeType . "\n";
|
||||
echo " Size: " . strlen($image->data) . " bytes\n";
|
||||
if (isset($image->width) && isset($image->height)) {
|
||||
echo " Dimensions: " . $image->width . "x" . $image->height . "\n";
|
||||
}
|
||||
echo "\n";
|
||||
}
|
||||
}
|
||||
?>
|
||||
```
|
||||
Reference in New Issue
Block a user