fail("failed to read fixture: docx/fake.docx"); } $result = Kreuzberg::extractBytesSync($contentBytes, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", \Kreuzberg\ExtractionConfig::from_json('{}')); $this->assertGreaterThanOrEqual(20, strlen($result->getContent())); } /** Standalone HWPX extraction using extract_bytes_sync */ public function test_format_hwpx_standalone(): void { $contentBytes = file_get_contents("hwpx/simple.hwpx"); if ($contentBytes === false) { $this->fail("failed to read fixture: hwpx/simple.hwpx"); } $result = Kreuzberg::extractBytesSync($contentBytes, "application/haansofthwpx", \Kreuzberg\ExtractionConfig::from_json('{}')); $this->assertGreaterThanOrEqual(20, strlen($result->getContent())); $this->assertStringContainsString("Hello from HWPX", $result->getContent()); } /** Standalone PDF text extraction using extract_bytes_sync */ public function test_format_pdf_text(): void { $contentBytes = file_get_contents("pdf/fake_memo.pdf"); if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); } $result = Kreuzberg::extractBytesSync($contentBytes, "application/pdf", \Kreuzberg\ExtractionConfig::from_json('{}')); $this->assertGreaterThanOrEqual(50, strlen($result->getContent())); $found = false; if (str_contains($result->getContent(), "Mallori")) { $found = true; } if (str_contains($result->getContent(), "May")) { $found = true; } $this->assertTrue($found, 'expected to contain at least one of the specified values'); } /** PPTX presentation extraction using extract_file_sync */ public function test_format_pptx(): void { $this->expectNotToPerformAssertions(); $result = Kreuzberg::extractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", \Kreuzberg\ExtractionConfig::from_json('{}')); } /** XLSX spreadsheet extraction using extract_file_sync */ public function test_format_xlsx(): void { $this->expectNotToPerformAssertions(); $result = Kreuzberg::extractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", \Kreuzberg\ExtractionConfig::from_json('{}')); } }