Files
fil/docs/snippets/php/plugins/pdf_metadata_extractor.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.5 KiB

<?php declare(strict_types=1);

use Kreuzberg\Kreuzberg;

class PdfMetadataExtractor implements PostProcessor {
    public function name(): string {
        return "pdf-metadata-extractor";
    }

    public function version(): string {
        return "1.0.0";
    }

    public function initialize(): void {
        // Load PDF parsing libraries if needed
    }

    public function shutdown(): void {
        // Cleanup resources
    }

    public function process(object &$result, object $config): void {
        // Only process PDFs
        if ($result->mime_type !== 'application/pdf') {
            return;
        }

        // Extract and attach metadata
        if (!isset($result->metadata)) {
            $result->metadata = [];
        }

        if (is_array($result->metadata)) {
            $result->metadata = array_merge($result->metadata, [
                'pdf_processor' => 'pdf-metadata-extractor',
                'extracted_at' => date('Y-m-d H:i:s'),
            ]);
        }
    }

    public function processingStage(): string {
        return "Middle";
    }

    public function shouldProcess(object $result, object $config): bool {
        return $result->mime_type === 'application/pdf';
    }

    public function estimatedDurationMs(object $result): int {
        return 10;
    }

    public function priority(): int {
        return 60;
    }
}

// Register the PDF metadata extractor
$processor = new PdfMetadataExtractor();
Kreuzberg::registerPostProcessor($processor);

echo "PDF metadata extractor registered\n";