```php title="disk_cache.php" cacheDir = $cacheDir ?? sys_get_temp_dir() . '/kreuzberg_cache'; $this->ttl = $ttl; if (!is_dir($this->cacheDir)) { mkdir($this->cacheDir, 0755, true); } } private function getCacheKey(string $filePath, ExtractionConfig $config): string { $fileHash = md5_file($filePath); $configHash = md5(json_encode($config->toArray())); return md5($filePath . $fileHash . $configHash); } private function getCachePath(string $key): string { return $this->cacheDir . '/' . $key . '.cache'; } public function get(string $filePath, ExtractionConfig $config): ?ExtractionResult { $key = $this->getCacheKey($filePath, $config); $cachePath = $this->getCachePath($key); if (!file_exists($cachePath)) { return null; } if (time() - filemtime($cachePath) > $this->ttl) { unlink($cachePath); return null; } $data = file_get_contents($cachePath); if ($data === false) { return null; } $cached = unserialize($data); if ($cached instanceof ExtractionResult) { return $cached; } return null; } public function set(string $filePath, ExtractionConfig $config, ExtractionResult $result): void { $key = $this->getCacheKey($filePath, $config); $cachePath = $this->getCachePath($key); file_put_contents($cachePath, serialize($result)); } public function clear(): void { $files = glob($this->cacheDir . '/*.cache'); foreach ($files as $file) { unlink($file); } } public function getStats(): array { $files = glob($this->cacheDir . '/*.cache'); $totalSize = 0; foreach ($files as $file) { $totalSize += filesize($file); } return [ 'total_entries' => count($files), 'cache_size_bytes' => $totalSize, 'cache_dir' => $this->cacheDir, ]; } } $cache = new DiskCache(); $kreuzberg = new Kreuzberg(); $config = new ExtractionConfig(); $file = 'document.pdf'; echo "First extraction (will be cached)...\n"; $start = microtime(true); $result = $cache->get($file, $config); if ($result === null) { $result = $kreuzberg->extractFile($file, config: $config); $cache->set($file, $config, $result); echo " Status: Extracted and cached\n"; } else { echo " Status: Retrieved from cache\n"; } $elapsed = microtime(true) - $start; echo " Time: " . number_format($elapsed, 4) . "s\n"; echo " Content length: " . strlen($result->content) . " chars\n\n"; echo "Second extraction (from cache)...\n"; $start = microtime(true); $result = $cache->get($file, $config); if ($result === null) { $result = $kreuzberg->extractFile($file, config: $config); $cache->set($file, $config, $result); echo " Status: Extracted and cached\n"; } else { echo " Status: Retrieved from cache\n"; } $elapsed = microtime(true) - $start; echo " Time: " . number_format($elapsed, 4) . "s\n"; echo " Content length: " . strlen($result->content) . " chars\n\n"; $stats = $cache->getStats(); echo "Cache Statistics:\n"; echo str_repeat('=', 60) . "\n"; echo "Total entries: {$stats['total_entries']}\n"; echo "Cache size: " . number_format($stats['cache_size_bytes'] / 1024 / 1024, 2) . " MB\n"; echo "Cache directory: {$stats['cache_dir']}\n\n"; class CachedKreuzberg { public function __construct( private Kreuzberg $kreuzberg, private DiskCache $cache ) {} public function extractFile( string $filePath, ?string $mimeType = null, ?ExtractionConfig $config = null ): ExtractionResult { $config = $config ?? new ExtractionConfig(); $result = $this->cache->get($filePath, $config); if ($result === null) { $result = $this->kreuzberg->extractFile($filePath, $mimeType, $config); $this->cache->set($filePath, $config, $result); } return $result; } public function clearCache(): void { $this->cache->clear(); } public function getCacheStats(): array { return $this->cache->getStats(); } } $cachedKreuzberg = new CachedKreuzberg( new Kreuzberg(), new DiskCache() ); echo "Using CachedKreuzberg wrapper:\n"; echo str_repeat('=', 60) . "\n"; $files = ['doc1.pdf', 'doc2.pdf', 'doc3.pdf']; foreach ($files as $file) { if (!file_exists($file)) continue; $start = microtime(true); $result = $cachedKreuzberg->extractFile($file); $elapsed = microtime(true) - $start; echo "$file: " . number_format($elapsed, 4) . "s\n"; } echo "\nCache stats:\n"; $stats = $cachedKreuzberg->getCacheStats(); print_r($stats); function cleanupCache(DiskCache $cache, int $maxAge = 7 * 86400): int { $cacheDir = $cache->getStats()['cache_dir']; $files = glob($cacheDir . '/*.cache'); $deleted = 0; foreach ($files as $file) { if (time() - filemtime($file) > $maxAge) { unlink($file); $deleted++; } } return $deleted; } $deleted = cleanupCache($cache, 7 * 86400); echo "\nCleaned up $deleted old cache entries\n"; ```