```php title="batch_processing.php" $result) { $filename = basename($files[$index]); echo "$filename:\n"; echo " Content: " . strlen($result->content) . " chars\n"; echo " Tables: " . count($result->tables) . "\n"; echo " MIME: " . $result->mimeType . "\n\n"; } } $config = new ExtractionConfig( extractTables: true, extractImages: false ); $kreuzberg = new Kreuzberg($config); $pdfFiles = glob('*.pdf'); if (!empty($pdfFiles)) { echo "Processing " . count($pdfFiles) . " PDF files...\n"; $start = microtime(true); $results = $kreuzberg->batchExtractFiles($pdfFiles, $config); $elapsed = microtime(true) - $start; echo "Completed in " . number_format($elapsed, 2) . " seconds\n"; echo "Throughput: " . number_format(count($pdfFiles) / $elapsed, 2) . " files/second\n\n"; $totalChars = 0; $totalTables = 0; foreach ($results as $result) { $totalChars += strlen($result->content); $totalTables += count($result->tables); } echo "Total content: " . number_format($totalChars) . " characters\n"; echo "Total tables: $totalTables\n"; } $uploadedFiles = [ ['data' => file_get_contents('file1.pdf'), 'mime' => 'application/pdf'], ['data' => file_get_contents('file2.docx'), 'mime' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'], ]; $dataList = array_column($uploadedFiles, 'data'); $mimeTypes = array_column($uploadedFiles, 'mime'); $results = batch_extract_bytes($dataList, $mimeTypes); echo "\nProcessed " . count($results) . " files from memory\n"; function processDirectory(string $dir, Kreuzberg $kreuzberg): array { $results = []; $iterator = new RecursiveIteratorIterator( new RecursiveDirectoryIterator($dir) ); $files = []; foreach ($iterator as $file) { if ($file->isFile()) { $ext = strtolower($file->getExtension()); if (in_array($ext, ['pdf', 'docx', 'xlsx', 'pptx', 'txt'], true)) { $files[] = $file->getPathname(); } } } if (empty($files)) { return $results; } $batches = array_chunk($files, 10); foreach ($batches as $batchIndex => $batch) { echo "Processing batch " . ($batchIndex + 1) . "/" . count($batches) . "...\n"; $batchResults = $kreuzberg->batchExtractFiles($batch); $results = array_merge($results, $batchResults); } return $results; } $directory = './documents'; if (is_dir($directory)) { echo "\nProcessing directory: $directory\n"; $results = processDirectory($directory, $kreuzberg); echo "Processed " . count($results) . " files\n"; } $mixedFiles = ['valid.pdf', 'nonexistent.pdf', 'another.docx']; try { $results = batch_extract_files($mixedFiles); } catch (\Kreuzberg\Exceptions\KreuzbergException $e) { echo "Batch processing error: " . $e->getMessage() . "\n"; } $allFiles = glob('documents/*.{pdf,docx,xlsx}', GLOB_BRACE); $batchSize = 5; $batches = array_chunk($allFiles, $batchSize); $totalProcessed = 0; echo "\nProcessing " . count($allFiles) . " files in " . count($batches) . " batches...\n"; foreach ($batches as $index => $batch) { $progress = (($index + 1) / count($batches)) * 100; echo sprintf("\rProgress: %.1f%% [%d/%d batches]", $progress, $index + 1, count($batches)); $results = $kreuzberg->batchExtractFiles($batch); $totalProcessed += count($results); } echo "\n\nCompleted! Processed $totalProcessed files.\n"; ```