```php title="metadata_extraction.php" metadata; echo "Document Metadata:\n"; echo str_repeat('=', 60) . "\n"; echo "Title: " . ($metadata->title ?? 'N/A') . "\n"; echo "Authors: " . (isset($metadata->authors) ? implode(', ', $metadata->authors) : 'N/A') . "\n"; echo "Subject: " . ($metadata->subject ?? 'N/A') . "\n"; echo "Created By: " . ($metadata->createdBy ?? 'N/A') . "\n"; echo "Producer: " . ($metadata->producer ?? 'N/A') . "\n"; echo "Created: " . ($metadata->createdAt ?? 'N/A') . "\n"; echo "Modified: " . ($metadata->modifiedAt ?? 'N/A') . "\n"; echo "Page Count: " . ($metadata->pageCount ?? 'N/A') . "\n"; echo "Keywords: " . implode(', ', $metadata->keywords ?? []) . "\n"; echo "Language: " . ($metadata->language ?? 'N/A') . "\n\n"; $files = glob('documents/*.{pdf,docx,xlsx}', GLOB_BRACE); $metadataCollection = []; foreach ($files as $file) { $result = extract_file($file); $metadataCollection[] = [ 'file' => basename($file), 'title' => $result->metadata->title ?? 'Untitled', 'author' => isset($result->metadata->authors) ? implode(', ', $result->metadata->authors) : 'Unknown', 'created' => $result->metadata->createdAt ?? 'Unknown', 'pages' => $result->metadata->pageCount ?? 0, 'size' => filesize($file), ]; } echo "Metadata Collection:\n"; echo str_repeat('=', 60) . "\n"; foreach ($metadataCollection as $meta) { echo "{$meta['file']}:\n"; echo " Title: {$meta['title']}\n"; echo " Author: {$meta['author']}\n"; echo " Created: {$meta['created']}\n"; echo " Pages: {$meta['pages']}\n"; echo " Size: " . number_format($meta['size'] / 1024, 2) . " KB\n\n"; } function searchByAuthor(array $collection, string $author): array { return array_filter($collection, function ($meta) use ($author) { return stripos($meta['author'], $author) !== false; }); } function searchByDateRange(array $collection, string $start, string $end): array { return array_filter($collection, function ($meta) use ($start, $end) { $created = $meta['created']; if ($created === 'Unknown') { return false; } $dateOnly = substr($created, 0, 10); return $dateOnly >= $start && $dateOnly <= $end; }); } $johnDocs = searchByAuthor($metadataCollection, 'John'); echo "Documents by John: " . count($johnDocs) . "\n"; $recentDocs = searchByDateRange($metadataCollection, '2024-01-01', '2024-12-31'); echo "Documents from 2024: " . count($recentDocs) . "\n\n"; function generateCatalog(array $collection): string { $html = "Document Catalog\n"; $html .= "

Document Catalog

\n"; $html .= "\n"; $html .= "\n"; foreach ($collection as $meta) { $html .= ""; $html .= ""; $html .= ""; $html .= ""; $html .= ""; $html .= ""; $html .= "\n"; } $html .= "
FileTitleAuthorCreatedPages
" . htmlspecialchars($meta['file']) . "" . htmlspecialchars($meta['title']) . "" . htmlspecialchars($meta['author']) . "" . htmlspecialchars($meta['created']) . "" . htmlspecialchars((string)$meta['pages']) . "
\n"; return $html; } $catalog = generateCatalog($metadataCollection); file_put_contents('catalog.html', $catalog); echo "Catalog saved to: catalog.html\n"; function exportMetadataToCSV(array $collection, string $filename): void { $fp = fopen($filename, 'w'); fputcsv($fp, ['File', 'Title', 'Author', 'Created', 'Pages', 'Size (KB)']); foreach ($collection as $meta) { fputcsv($fp, [ $meta['file'], $meta['title'], $meta['author'], $meta['created'], $meta['pages'], number_format($meta['size'] / 1024, 2), ]); } fclose($fp); } exportMetadataToCSV($metadataCollection, 'metadata.csv'); echo "Metadata exported to: metadata.csv\n"; $totalPages = array_sum(array_column($metadataCollection, 'pages')); $totalSize = array_sum(array_column($metadataCollection, 'size')); $authors = array_unique(array_column($metadataCollection, 'author')); echo "\nCollection Statistics:\n"; echo str_repeat('=', 60) . "\n"; echo "Total documents: " . count($metadataCollection) . "\n"; echo "Total pages: " . number_format($totalPages) . "\n"; echo "Total size: " . number_format($totalSize / 1024 / 1024, 2) . " MB\n"; echo "Unique authors: " . count($authors) . "\n"; echo "Average pages per document: " . number_format($totalPages / count($metadataCollection), 1) . "\n"; $byAuthor = []; foreach ($metadataCollection as $meta) { $author = $meta['author']; if (!isset($byAuthor[$author])) { $byAuthor[$author] = []; } $byAuthor[$author][] = $meta; } echo "\nDocuments by Author:\n"; echo str_repeat('=', 60) . "\n"; foreach ($byAuthor as $author => $docs) { echo "$author: " . count($docs) . " documents\n"; } function validateMetadata(array $meta): array { $issues = []; if (empty($meta['title']) || $meta['title'] === 'Untitled') { $issues[] = 'Missing title'; } if (empty($meta['author']) || $meta['author'] === 'Unknown') { $issues[] = 'Missing author'; } if (empty($meta['created']) || $meta['created'] === 'Unknown') { $issues[] = 'Missing creation date'; } if ($meta['pages'] === 0) { $issues[] = 'Invalid page count'; } return $issues; } echo "\nMetadata Quality Check:\n"; echo str_repeat('=', 60) . "\n"; $incomplete = 0; foreach ($metadataCollection as $meta) { $issues = validateMetadata($meta); if (!empty($issues)) { $incomplete++; echo "{$meta['file']}: " . implode(', ', $issues) . "\n"; } } echo "\nIncomplete metadata: $incomplete/" . count($metadataCollection) . " documents\n"; ```