Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
```php title="basic_extraction_oop.php"
<?php
declare(strict_types=1);
/**
* Basic Document Extraction (OOP API)
*
* This example demonstrates the simplest way to extract text from a document
* using the object-oriented API.
*/
require_once __DIR__ . '/vendor/autoload.php';
use Kreuzberg\Kreuzberg;
$kreuzberg = new Kreuzberg();
$result = $kreuzberg->extractFile('document.pdf');
echo "Extracted Content:\n";
echo "==================\n";
echo $result->content . "\n\n";
echo "Metadata:\n";
echo "=========\n";
echo "Title: " . ($result->metadata->title ?? 'N/A') . "\n";
echo "Authors: " . (isset($result->metadata->authors) ? implode(', ', $result->metadata->authors) : 'N/A') . "\n";
echo "Pages: " . ($result->metadata->pageCount ?? 'N/A') . "\n";
echo "Format: " . $result->mimeType . "\n\n";
if (count($result->tables) > 0) {
echo "Tables Found: " . count($result->tables) . "\n";
foreach ($result->tables as $index => $table) {
echo "\nTable " . ($index + 1) . " (Page {$table->pageNumber}):\n";
echo $table->markdown . "\n";
}
}
```

View File

@@ -0,0 +1,35 @@
```php title="basic_extraction_procedural.php"
<?php
declare(strict_types=1);
/**
* Basic Document Extraction (Procedural API)
*
* This example shows the procedural API for document extraction,
* which is more concise for simple use cases.
*/
require_once __DIR__ . '/vendor/autoload.php';
use function Kreuzberg\extract_file;
$result = extract_file('document.pdf');
echo "Extracted Text:\n";
echo str_repeat('=', 50) . "\n";
echo $result->content . "\n\n";
echo "Document Information:\n";
echo str_repeat('=', 50) . "\n";
printf("Title: %s\n", $result->metadata->title ?? 'Unknown');
printf("Authors: %s\n", isset($result->metadata->authors) ? implode(', ', $result->metadata->authors) : 'Unknown');
printf("Pages: %d\n", $result->metadata->pageCount ?? 0);
printf("Format: %s\n", $result->mimeType);
$char_count = mb_strlen($result->content);
$word_count = str_word_count($result->content);
printf("\nStatistics:\n");
printf("Characters: %d\n", $char_count);
printf("Words: %d\n", $word_count);
```

View File

@@ -0,0 +1,44 @@
```php title="extract_from_bytes.php"
<?php
declare(strict_types=1);
/**
* Extracting from Bytes
*
* Extract content from file data in memory instead of from disk.
* Useful for processing uploaded files or data from remote sources.
*/
require_once __DIR__ . '/vendor/autoload.php';
use Kreuzberg\Kreuzberg;
use function Kreuzberg\extract_bytes;
$fileData = file_get_contents('document.pdf');
$mimeType = 'application/pdf';
$result = extract_bytes($fileData, $mimeType);
echo "Extracted using procedural API:\n";
echo substr($result->content, 0, 200) . "...\n\n";
$kreuzberg = new Kreuzberg();
$result = $kreuzberg->extractBytes($fileData, $mimeType);
echo "Extracted using OOP API:\n";
echo substr($result->content, 0, 200) . "...\n\n";
$uploadedFile = [
'tmp_name' => '/tmp/uploaded_document.pdf',
'type' => 'application/pdf',
'size' => 1024000,
];
if (file_exists($uploadedFile['tmp_name'])) {
$data = file_get_contents($uploadedFile['tmp_name']);
$result = extract_bytes($data, $uploadedFile['type']);
echo "Uploaded file processed:\n";
echo "Size: " . strlen($data) . " bytes\n";
echo "Content length: " . strlen($result->content) . " characters\n";
}
```

View File

@@ -0,0 +1,53 @@
```php title="mime_type_detection.php"
<?php
declare(strict_types=1);
/**
* MIME Type Detection
*
* Kreuzberg can automatically detect MIME types from file content or paths.
* This is useful when the file extension is missing or unreliable.
*/
require_once __DIR__ . '/vendor/autoload.php';
use function Kreuzberg\detect_mime_type;
use function Kreuzberg\detect_mime_type_from_path;
use function Kreuzberg\extract_file;
$path = 'document.pdf';
$mimeType = detect_mime_type_from_path($path);
echo "Detected MIME type from path: $mimeType\n";
$data = file_get_contents($path);
$mimeType = detect_mime_type($data);
echo "Detected MIME type from content: $mimeType\n\n";
$unknownFile = 'file_without_extension';
if (file_exists($unknownFile)) {
$detectedType = detect_mime_type_from_path($unknownFile);
echo "Unknown file detected as: $detectedType\n";
$result = extract_file($unknownFile, $detectedType);
echo "Successfully extracted " . strlen($result->content) . " characters\n";
}
$allowedTypes = [
'application/pdf',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/plain',
];
$fileToCheck = 'user_upload.dat';
if (file_exists($fileToCheck)) {
$type = detect_mime_type_from_path($fileToCheck);
if (in_array($type, $allowedTypes, true)) {
echo "File type $type is allowed, processing...\n";
$result = extract_file($fileToCheck);
} else {
echo "File type $type is not allowed\n";
}
}
```