```php title="tables.php"
extractFile('document.pdf');
echo "Table Extraction Results:\n";
echo str_repeat('=', 60) . "\n";
echo "Tables found: " . count($result->tables) . "\n\n";
foreach ($result->tables as $tableIndex => $table) {
echo "Table " . ($tableIndex + 1) . ":\n";
echo str_repeat('-', 40) . "\n";
$rowCount = count($table->cells);
$colCount = !empty($table->cells) ? count($table->cells[0]) : 0;
echo " Dimensions: $rowCount rows × $colCount columns\n";
if (isset($table->pageNumber)) {
echo " Page: {$table->pageNumber}\n";
}
echo "\n";
echo " Markdown representation:\n";
echo str_repeat('-', 40) . "\n";
echo $table->markdown . "\n\n";
echo " Raw data preview:\n";
echo str_repeat('-', 40) . "\n";
$previewRows = array_slice($table->cells, 0, 3);
foreach ($previewRows as $rowIndex => $row) {
echo " Row " . ($rowIndex + 1) . ": [" . implode(' | ', $row) . "]\n";
}
if ($rowCount > 3) {
echo " ... and " . ($rowCount - 3) . " more rows\n";
}
echo "\n";
}
echo "Exporting Tables to CSV:\n";
echo str_repeat('=', 60) . "\n";
$outputDir = './exported_tables';
if (!is_dir($outputDir)) {
mkdir($outputDir, 0755, true);
}
foreach ($result->tables as $index => $table) {
$filename = sprintf('table_%d.csv', $index + 1);
$filepath = $outputDir . '/' . $filename;
$fp = fopen($filepath, 'w');
if ($fp !== false) {
foreach ($table->cells as $row) {
fputcsv($fp, $row);
}
fclose($fp);
echo "Saved: $filename\n";
} else {
echo "Error: Failed to create $filename\n";
}
}
echo "\n";
echo "Exporting Tables to JSON:\n";
echo str_repeat('=', 60) . "\n";
foreach ($result->tables as $index => $table) {
$filename = sprintf('table_%d.json', $index + 1);
$filepath = $outputDir . '/' . $filename;
$tableData = [
'index' => $index + 1,
'page' => $table->pageNumber ?? null,
'dimensions' => [
'rows' => count($table->cells),
'columns' => !empty($table->cells) ? count($table->cells[0]) : 0,
],
'data' => $table->cells,
'markdown' => $table->markdown,
];
$json = json_encode($tableData, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents($filepath, $json);
echo "Saved: $filename\n";
}
echo "\n";
function tableToHtml(ExtractedTable $table): string
{
$html = "
\n";
foreach ($table->cells as $rowIndex => $row) {
$html .= " \n";
$tag = $rowIndex === 0 ? 'th' : 'td';
foreach ($row as $cell) {
$escapedCell = htmlspecialchars($cell, ENT_QUOTES, 'UTF-8');
$html .= " <$tag>$escapedCell$tag>\n";
}
$html .= "
\n";
}
$html .= "
";
return $html;
}
echo "Exporting Tables to HTML:\n";
echo str_repeat('=', 60) . "\n";
foreach ($result->tables as $index => $table) {
$filename = sprintf('table_%d.html', $index + 1);
$filepath = $outputDir . '/' . $filename;
$html = "\n";
$html .= "\n\n";
$html .= " \n";
$html .= " Table " . ($index + 1) . "\n";
$html .= " \n";
$html .= "\n\n";
$html .= " Table " . ($index + 1) . "
\n";
$html .= tableToHtml($table) . "\n";
$html .= "\n";
file_put_contents($filepath, $html);
echo "Saved: $filename\n";
}
echo "\n";
echo "Table Analysis:\n";
echo str_repeat('=', 60) . "\n";
foreach ($result->tables as $index => $table) {
echo "Table " . ($index + 1) . " Analysis:\n";
$cells = $table->cells;
$totalCells = array_sum(array_map('count', $cells));
$emptyCells = 0;
$numericCells = 0;
foreach ($cells as $row) {
foreach ($row as $cell) {
if (empty(trim($cell))) {
$emptyCells++;
}
if (is_numeric($cell)) {
$numericCells++;
}
}
}
echo " Total cells: $totalCells\n";
echo " Empty cells: $emptyCells (" . number_format(($emptyCells / max($totalCells, 1)) * 100, 1) . "%)\n";
echo " Numeric cells: $numericCells (" . number_format(($numericCells / max($totalCells, 1)) * 100, 1) . "%)\n";
$numericRatio = $numericCells / max($totalCells, 1);
$tableType = match(true) {
$numericRatio > 0.5 => 'Data/Numeric Table',
$numericRatio > 0.2 => 'Mixed Content Table',
default => 'Text Table',
};
echo " Table type: $tableType\n\n";
}
function tableToAssociativeArray(ExtractedTable $table): array
{
if (empty($table->cells)) {
return [];
}
$headers = array_shift($table->cells);
$data = [];
foreach ($table->cells as $row) {
$rowData = [];
foreach ($headers as $index => $header) {
$rowData[$header] = $row[$index] ?? '';
}
$data[] = $rowData;
}
return $data;
}
if (!empty($result->tables)) {
$firstTable = $result->tables[0];
$associativeData = tableToAssociativeArray($firstTable);
echo "First Table as Associative Array:\n";
echo str_repeat('=', 60) . "\n";
echo json_encode(array_slice($associativeData, 0, 3), JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE) . "\n";
if (count($associativeData) > 3) {
echo "... and " . (count($associativeData) - 3) . " more records\n";
}
}
```