Files
fil/e2e/php/tests/SmokeTest.php

157 lines
6.6 KiB
PHP
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: smoke. */
final class SmokeTest extends TestCase
{
/** OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge. */
public function test_ocr_image_png(): void
{
$contentBytes = file_get_contents("images/test_hello_world.png");
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractBytes($contentBytes, "image/png", $config);
$this->assertEquals("image/png", trim($result->mimeType));
$this->assertGreaterThanOrEqual(1, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Hello")) { $found = true; }
if (str_contains($result->getContent(), "World")) { $found = true; }
if (str_contains($result->getContent(), "hello")) { $found = true; }
if (str_contains($result->getContent(), "world")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: DOCX with formatted text */
public function test_smoke_docx_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", $config);
$this->assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", trim($result->mimeType));
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Lorem")) { $found = true; }
if (str_contains($result->getContent(), "ipsum")) { $found = true; }
if (str_contains($result->getContent(), "document")) { $found = true; }
if (str_contains($result->getContent(), "text")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: HTML table extraction */
public function test_smoke_html_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("html/simple_table.html", "text/html", $config);
$this->assertEquals("text/html", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Sample Data Table")) { $found = true; }
if (str_contains($result->getContent(), "Laptop")) { $found = true; }
if (str_contains($result->getContent(), "Electronics")) { $found = true; }
if (str_contains($result->getContent(), "Product")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: PNG image (without OCR, metadata only) */
public function test_smoke_image_png(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["disableOcr" => true]));
$result = Kreuzberg::extractFile("images/sample.png", null, $config);
$this->assertEquals("image/png", trim($result->mimeType));
}
/** Smoke test: JSON file extraction */
public function test_smoke_json_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("json/simple.json", "application/json", $config);
$this->assertEquals("application/json", trim($result->mimeType));
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
}
/** Smoke test: PDF with simple text extraction */
public function test_smoke_pdf_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", "application/pdf", $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
if (str_contains($result->getContent(), "To Whom it May Concern")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: Plain text file */
public function test_smoke_txt_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("text/report.txt", "text/plain", $config);
$this->assertEquals("text/plain", trim($result->mimeType));
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
}
/** Smoke test: XLSX with basic spreadsheet data including tables */
public function test_smoke_xlsx_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", $config);
$this->assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", trim($result->mimeType));
$this->assertGreaterThanOrEqual(100, strlen($result->getContent()));
$this->assertStringContainsString("Team", $result->getContent());
$this->assertStringContainsString("Location", $result->getContent());
$this->assertStringContainsString("Stanley Cups", $result->getContent());
$this->assertStringContainsString("Blues", $result->getContent());
$this->assertStringContainsString("Flyers", $result->getContent());
$this->assertStringContainsString("Maple Leafs", $result->getContent());
$this->assertStringContainsString("STL", $result->getContent());
$this->assertStringContainsString("PHI", $result->getContent());
$this->assertStringContainsString("TOR", $result->getContent());
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}