32 lines
942 B
Markdown
32 lines
942 B
Markdown
|
|
```php title="PHP"
|
||
|
|
<?php
|
||
|
|
declare(strict_types=1);
|
||
|
|
|
||
|
|
use Kreuzberg\Kreuzberg;
|
||
|
|
use Kreuzberg\ExtractionConfig;
|
||
|
|
use Kreuzberg\KreuzbergException;
|
||
|
|
|
||
|
|
function extract_text(string $bytes, string $mime_type): string {
|
||
|
|
$config = new ExtractionConfig();
|
||
|
|
$result = Kreuzberg::extractBytesSync($bytes, $mime_type, $config);
|
||
|
|
return $result->getContent();
|
||
|
|
}
|
||
|
|
|
||
|
|
$bytes = file_get_contents('document.pdf') ?: '';
|
||
|
|
try {
|
||
|
|
$text = extract_text($bytes, 'application/pdf');
|
||
|
|
echo "Extracted " . strlen($text) . " chars\n";
|
||
|
|
} catch (KreuzbergException $e) {
|
||
|
|
// All Kreuzberg errors are KreuzbergException
|
||
|
|
// Check the message for error type details
|
||
|
|
$message = $e->getMessage();
|
||
|
|
if (strpos($message, 'not supported') !== false) {
|
||
|
|
echo "Format not supported\n";
|
||
|
|
} elseif (strpos($message, 'OCR') !== false) {
|
||
|
|
echo "OCR failed: " . $message . "\n";
|
||
|
|
} else {
|
||
|
|
echo "Error: " . $message . "\n";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
```
|