This commit is contained in:
39
docs/snippets/php/quickstart/basic_extraction_oop.php
Normal file
39
docs/snippets/php/quickstart/basic_extraction_oop.php
Normal file
@@ -0,0 +1,39 @@
|
||||
```php title="basic_extraction_oop.php"
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* Basic Document Extraction (OOP API)
|
||||
*
|
||||
* This example demonstrates the simplest way to extract text from a document
|
||||
* using the object-oriented API.
|
||||
*/
|
||||
|
||||
require_once __DIR__ . '/vendor/autoload.php';
|
||||
|
||||
use Kreuzberg\Kreuzberg;
|
||||
|
||||
$kreuzberg = new Kreuzberg();
|
||||
|
||||
$result = $kreuzberg->extractFile('document.pdf');
|
||||
|
||||
echo "Extracted Content:\n";
|
||||
echo "==================\n";
|
||||
echo $result->content . "\n\n";
|
||||
|
||||
echo "Metadata:\n";
|
||||
echo "=========\n";
|
||||
echo "Title: " . ($result->metadata->title ?? 'N/A') . "\n";
|
||||
echo "Authors: " . (isset($result->metadata->authors) ? implode(', ', $result->metadata->authors) : 'N/A') . "\n";
|
||||
echo "Pages: " . ($result->metadata->pageCount ?? 'N/A') . "\n";
|
||||
echo "Format: " . $result->mimeType . "\n\n";
|
||||
|
||||
if (count($result->tables) > 0) {
|
||||
echo "Tables Found: " . count($result->tables) . "\n";
|
||||
foreach ($result->tables as $index => $table) {
|
||||
echo "\nTable " . ($index + 1) . " (Page {$table->pageNumber}):\n";
|
||||
echo $table->markdown . "\n";
|
||||
}
|
||||
}
|
||||
```
|
||||
35
docs/snippets/php/quickstart/basic_extraction_procedural.php
Normal file
35
docs/snippets/php/quickstart/basic_extraction_procedural.php
Normal file
@@ -0,0 +1,35 @@
|
||||
```php title="basic_extraction_procedural.php"
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* Basic Document Extraction (Procedural API)
|
||||
*
|
||||
* This example shows the procedural API for document extraction,
|
||||
* which is more concise for simple use cases.
|
||||
*/
|
||||
|
||||
require_once __DIR__ . '/vendor/autoload.php';
|
||||
|
||||
use function Kreuzberg\extract_file;
|
||||
|
||||
$result = extract_file('document.pdf');
|
||||
|
||||
echo "Extracted Text:\n";
|
||||
echo str_repeat('=', 50) . "\n";
|
||||
echo $result->content . "\n\n";
|
||||
|
||||
echo "Document Information:\n";
|
||||
echo str_repeat('=', 50) . "\n";
|
||||
printf("Title: %s\n", $result->metadata->title ?? 'Unknown');
|
||||
printf("Authors: %s\n", isset($result->metadata->authors) ? implode(', ', $result->metadata->authors) : 'Unknown');
|
||||
printf("Pages: %d\n", $result->metadata->pageCount ?? 0);
|
||||
printf("Format: %s\n", $result->mimeType);
|
||||
|
||||
$char_count = mb_strlen($result->content);
|
||||
$word_count = str_word_count($result->content);
|
||||
printf("\nStatistics:\n");
|
||||
printf("Characters: %d\n", $char_count);
|
||||
printf("Words: %d\n", $word_count);
|
||||
```
|
||||
44
docs/snippets/php/quickstart/extract_from_bytes.php
Normal file
44
docs/snippets/php/quickstart/extract_from_bytes.php
Normal file
@@ -0,0 +1,44 @@
|
||||
```php title="extract_from_bytes.php"
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* Extracting from Bytes
|
||||
*
|
||||
* Extract content from file data in memory instead of from disk.
|
||||
* Useful for processing uploaded files or data from remote sources.
|
||||
*/
|
||||
|
||||
require_once __DIR__ . '/vendor/autoload.php';
|
||||
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use function Kreuzberg\extract_bytes;
|
||||
|
||||
$fileData = file_get_contents('document.pdf');
|
||||
$mimeType = 'application/pdf';
|
||||
|
||||
$result = extract_bytes($fileData, $mimeType);
|
||||
echo "Extracted using procedural API:\n";
|
||||
echo substr($result->content, 0, 200) . "...\n\n";
|
||||
|
||||
$kreuzberg = new Kreuzberg();
|
||||
$result = $kreuzberg->extractBytes($fileData, $mimeType);
|
||||
echo "Extracted using OOP API:\n";
|
||||
echo substr($result->content, 0, 200) . "...\n\n";
|
||||
|
||||
$uploadedFile = [
|
||||
'tmp_name' => '/tmp/uploaded_document.pdf',
|
||||
'type' => 'application/pdf',
|
||||
'size' => 1024000,
|
||||
];
|
||||
|
||||
if (file_exists($uploadedFile['tmp_name'])) {
|
||||
$data = file_get_contents($uploadedFile['tmp_name']);
|
||||
$result = extract_bytes($data, $uploadedFile['type']);
|
||||
|
||||
echo "Uploaded file processed:\n";
|
||||
echo "Size: " . strlen($data) . " bytes\n";
|
||||
echo "Content length: " . strlen($result->content) . " characters\n";
|
||||
}
|
||||
```
|
||||
53
docs/snippets/php/quickstart/mime_type_detection.php
Normal file
53
docs/snippets/php/quickstart/mime_type_detection.php
Normal file
@@ -0,0 +1,53 @@
|
||||
```php title="mime_type_detection.php"
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* MIME Type Detection
|
||||
*
|
||||
* Kreuzberg can automatically detect MIME types from file content or paths.
|
||||
* This is useful when the file extension is missing or unreliable.
|
||||
*/
|
||||
|
||||
require_once __DIR__ . '/vendor/autoload.php';
|
||||
|
||||
use function Kreuzberg\detect_mime_type;
|
||||
use function Kreuzberg\detect_mime_type_from_path;
|
||||
use function Kreuzberg\extract_file;
|
||||
|
||||
$path = 'document.pdf';
|
||||
$mimeType = detect_mime_type_from_path($path);
|
||||
echo "Detected MIME type from path: $mimeType\n";
|
||||
|
||||
$data = file_get_contents($path);
|
||||
$mimeType = detect_mime_type($data);
|
||||
echo "Detected MIME type from content: $mimeType\n\n";
|
||||
|
||||
$unknownFile = 'file_without_extension';
|
||||
if (file_exists($unknownFile)) {
|
||||
$detectedType = detect_mime_type_from_path($unknownFile);
|
||||
echo "Unknown file detected as: $detectedType\n";
|
||||
|
||||
$result = extract_file($unknownFile, $detectedType);
|
||||
echo "Successfully extracted " . strlen($result->content) . " characters\n";
|
||||
}
|
||||
|
||||
$allowedTypes = [
|
||||
'application/pdf',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'text/plain',
|
||||
];
|
||||
|
||||
$fileToCheck = 'user_upload.dat';
|
||||
if (file_exists($fileToCheck)) {
|
||||
$type = detect_mime_type_from_path($fileToCheck);
|
||||
|
||||
if (in_array($type, $allowedTypes, true)) {
|
||||
echo "File type $type is allowed, processing...\n";
|
||||
$result = extract_file($fileToCheck);
|
||||
} else {
|
||||
echo "File type $type is not allowed\n";
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user