This commit is contained in:
84
e2e/php/tests/FormatSpecificTest.php
generated
Normal file
84
e2e/php/tests/FormatSpecificTest.php
generated
Normal file
@@ -0,0 +1,84 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: format_specific. */
|
||||
final class FormatSpecificTest extends TestCase
|
||||
{
|
||||
|
||||
/** Standalone DOCX extraction using extract_bytes_sync */
|
||||
public function test_format_docx_standalone(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("docx/fake.docx");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: docx/fake.docx"); }
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Standalone HWPX extraction using extract_bytes_sync */
|
||||
public function test_format_hwpx_standalone(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("hwpx/simple.hwpx");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: hwpx/simple.hwpx"); }
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/haansofthwpx", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
|
||||
$this->assertStringContainsString("Hello from HWPX", $result->getContent());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Standalone PDF text extraction using extract_bytes_sync */
|
||||
public function test_format_pdf_text(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/pdf", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "May")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** PPTX presentation extraction using extract_file_sync */
|
||||
public function test_format_pptx(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::extractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** XLSX spreadsheet extraction using extract_file_sync */
|
||||
public function test_format_xlsx(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::extractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user