Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

26
e2e/php/bootstrap.php generated Normal file
View File

@@ -0,0 +1,26 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
// Load the e2e project autoloader (PHPUnit, test helpers).
require_once __DIR__ . '/vendor/autoload.php';
// Load the PHP binding package classes via its Composer autoloader.
// The package's autoloader is separate from the e2e project's autoloader
// since the php-ext type prevents direct composer path dependency.
$pkgAutoloader = __DIR__ . '/../../packages/php/vendor/autoload.php';
if (file_exists($pkgAutoloader)) {
require_once $pkgAutoloader;
}
// Change to the configured test-documents directory so that fixture file
// paths like "pdf/fake_memo.pdf" resolve correctly when running phpunit
// from e2e/php/.
$_test_documents = __DIR__ . '/../../test_documents';
if (is_dir($_test_documents)) {
chdir($_test_documents);
}

22
e2e/php/composer.json generated Normal file
View File

@@ -0,0 +1,22 @@
{
"name": "kreuzberg-dev/e2e-php",
"description": "E2e tests for PHP bindings",
"type": "project",
"require-dev": {
"phpunit/phpunit": "^13.1",
"guzzlehttp/guzzle": "^7.0"
},
"autoload": {
"psr-4": {
"Kreuzberg\\": "../../packages/php/src/"
}
},
"autoload-dev": {
"psr-4": {
"Kreuzberg\\E2e\\": "tests/"
}
},
"scripts": {
"test": "php run_tests.php"
}
}

2470
e2e/php/composer.lock generated Normal file

File diff suppressed because it is too large Load Diff

4
e2e/php/php.ini generated Normal file
View File

@@ -0,0 +1,4 @@
; Temporary PHP INI for e2e tests — loads kreuzberg PHP extension from system extension directory
[PHP]
extension_dir=/opt/homebrew/lib/php/pecl/20240924
extension=libkreuzberg_php.dylib

13
e2e/php/phpunit.xml generated Normal file
View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/13.1/phpunit.xsd"
bootstrap="bootstrap.php"
colors="true"
failOnRisky="true"
failOnWarning="true">
<testsuites>
<testsuite name="e2e">
<directory>tests</directory>
</testsuite>
</testsuites>
</phpunit>

47
e2e/php/run_tests.php generated Executable file
View File

@@ -0,0 +1,47 @@
#!/usr/bin/env php
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
// Determine platform-specific extension suffix.
$extSuffix = match (PHP_OS_FAMILY) {
'Darwin' => '.dylib',
default => '.so',
};
$extPath = __DIR__ . '/../../target/release/libkreuzberg_php' . $extSuffix;
// If the locally-built extension exists and we have not already restarted with it,
// re-exec PHP with the freshly-built extension loaded explicitly via `-d extension=`.
// The system php.ini is kept (no `-n`) so PHPUnit's required extensions — dom, json,
// libxml, mbstring, tokenizer, xml, xmlwriter — remain available. `-n` drops every
// shared module, which breaks PHPUnit on distributions that ship those as shared
// extensions (e.g. Debian/Ubuntu); they only survive `-n` where compiled statically.
if (file_exists($extPath) && !getenv('ALEF_PHP_LOCAL_EXT_LOADED')) {
putenv('ALEF_PHP_LOCAL_EXT_LOADED=1');
$php = PHP_BINARY;
$phpunitPath = __DIR__ . '/vendor/bin/phpunit';
$cmd = array_merge(
[$php, '-d', 'extension=' . $extPath],
[$phpunitPath],
array_slice($GLOBALS['argv'], 1)
);
passthru(implode(' ', array_map('escapeshellarg', $cmd)), $exitCode);
exit($exitCode);
}
// Extension is now loaded (via the restart above).
// Invoke PHPUnit normally.
$phpunitPath = __DIR__ . '/vendor/bin/phpunit';
if (!file_exists($phpunitPath)) {
echo "PHPUnit not found at $phpunitPath. Run 'composer install' first.\n";
exit(1);
}
require $phpunitPath;

54
e2e/php/tests/AsyncTest.php generated Normal file
View File

@@ -0,0 +1,54 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: async. */
final class AsyncTest extends TestCase
{
/** Async extract_bytes call on PDF document */
public function test_async_extract_bytes(): void
{
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
$result = Kreuzberg::extractBytes($contentBytes, "application/pdf", \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
}
/** extract_bytes empty MIME async */
public function test_async_extract_bytes_empty_mime(): void
{
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
Kreuzberg::extractBytes($contentBytes, "", $config);
}
/** extract_bytes unsupported MIME async */
public function test_async_extract_bytes_invalid_mime(): void
{
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
Kreuzberg::extractBytes($contentBytes, "application/x-nonexistent", $config);
}
}

124
e2e/php/tests/BatchTest.php generated Normal file
View File

@@ -0,0 +1,124 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\BatchBytesItem;
use Kreuzberg\BatchFileItem;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: batch. */
final class BatchTest extends TestCase
{
/** batch_extract_bytes_sync invalid MIME */
public function test_batch_bytes_invalid_mime(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractBytesSync([new BatchBytesItem(content: "\x48\x65\x6c\x6c\x6f", mimeType: "application/x-nonexistent")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** batch_extract_bytes: happy path with mixed inputs */
public function test_batch_extract_bytes_happy(): void
{
$result = Kreuzberg::batchExtractBytes([new BatchBytesItem(content: "\x48\x65\x6c\x6c\x6f\x2c\x20\x77\x6f\x72\x6c\x64\x21", mimeType: "text/plain"), new BatchBytesItem(content: "\x3c\x68\x74\x6d\x6c\x3e\x3c\x62\x6f\x64\x79\x3e\x54\x65\x73\x74\x3c\x2f\x62\x6f\x64\x79\x3e\x3c\x2f\x68\x74\x6d\x6c\x3e", mimeType: "text/html")], \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertGreaterThanOrEqual(1, count($result));
}
/** batch_extract_bytes: handles unsupported MIME gracefully */
public function test_batch_extract_bytes_mixed_format(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractBytes([new BatchBytesItem(content: "\x50\x44\x46\x20\x70\x6c\x61\x63\x65\x68\x6f\x6c\x64\x65\x72", mimeType: "application/x-unknown")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** batch_extract_bytes_sync: empty batch */
public function test_batch_extract_bytes_sync_empty_list(): void
{
$result = Kreuzberg::batchExtractBytesSync([], \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertCount(0, $result);
}
/** batch_extract_bytes_sync: unsupported MIME */
public function test_batch_extract_bytes_sync_invalid_mime(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractBytesSync([new BatchBytesItem(content: "\x64\x61\x74\x61", mimeType: "application/x-unknown")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** Extract text from multiple files asynchronously */
public function test_batch_file_async_basic(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractFiles([new BatchFileItem(path: "pdf/fake_memo.pdf"), new BatchFileItem(path: "text/fake_text.txt")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** batch_extract_file async nonexistent */
public function test_batch_file_async_not_found(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractFiles([new BatchFileItem(path: "/nonexistent/a.pdf")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** batch_extract_file_sync nonexistent */
public function test_batch_file_not_found(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractFilesSync([new BatchFileItem(path: "/nonexistent/a.pdf"), new BatchFileItem(path: "/nonexistent/b.txt")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** batch_extract_file_sync mixed */
public function test_batch_file_partial(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractFilesSync([new BatchFileItem(path: "text/plain.txt"), new BatchFileItem(path: "/nonexistent/missing.pdf")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** Extract text from multiple files synchronously */
public function test_batch_file_sync_basic(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::batchExtractFilesSync([new BatchFileItem(path: "pdf/fake_memo.pdf"), new BatchFileItem(path: "text/fake_text.txt")], \Kreuzberg\ExtractionConfig::from_json('{}'));
}
}

34
e2e/php/tests/CodeTest.php generated Normal file
View File

@@ -0,0 +1,34 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: code. */
final class CodeTest extends TestCase
{
/** Test language detection from shebang line via bytes input */
public function test_code_shebang_detection(): void
{
$result = Kreuzberg::extractFileSync("code/script.sh", "text/x-source-code", \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertEquals("text/x-source-code", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$this->assertStringContainsString("build", $result->getContent());
$this->assertStringContainsString("clean", $result->getContent());
}
}

260
e2e/php/tests/ContractTest.php generated Normal file
View File

@@ -0,0 +1,260 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: contract. */
final class ContractTest extends TestCase
{
/** Tests async batch bytes extraction API (batch_extract_bytes) */
public function test_api_batch_bytes_async(): void
{
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter) */
public function test_api_batch_bytes_with_configs_async(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'metadata.output_format' not available on result type
}
/** Tests async batch file extraction API (batch_extract_file) */
public function test_api_batch_file_async(): void
{
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter) */
public function test_api_batch_file_with_configs_async(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'metadata.output_format' not available on result type
}
/** Tests async bytes extraction API (extract_bytes) */
public function test_api_extract_bytes_async(): void
{
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Tests async file extraction API (extract_file) */
public function test_api_extract_file_async(): void
{
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Tests markdown chunker prepends heading hierarchy to chunk content */
public function test_config_chunking_prepend_heading_context(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["chunking" => ["chunkerType" => "markdown", "maxChars" => 300, "maxOverlap" => 50, "prependHeadingContext" => true]]));
$result = Kreuzberg::extractFileSync("markdown/extraction_test.md", null, $config);
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'chunks' not available on result type $this->assertTrue(array_reduce($result->chunks ?? [], fn($carry, $c) => $carry && !empty($c->content), true)); // skipped: field 'chunks_have_heading_context' not available on result type // skipped: field 'first_chunk_starts_with_heading' not available on result type
}
/** Tests document structure with DOCX heading-driven nesting */
public function test_config_document_structure_with_headings(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["includeDocumentStructure" => true]));
$result = Kreuzberg::extractFileSync("docx/fake.docx", null, $config);
$this->assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", trim($result->mimeType));
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
}
/** Tests element-based result format with element type assertions on DOCX */
public function test_config_element_types(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["resultFormat" => "element_based"]));
$result = Kreuzberg::extractFileSync("docx/unit_test_headers.docx", null, $config);
$found = false;
if (str_contains($result->mimeType, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
// skipped: field 'elements' not available on result type
}
/** Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions */
public function test_config_extraction_timeout(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["extractionTimeoutSecs" => 300]));
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
}
/** Tests keyword extraction via YAKE algorithm */
public function test_config_keywords(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["keywords" => ["algorithm" => "yake", "maxKeywords" => 10]]));
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'keywords' not available on PHP ExtractionResult // skipped: field 'keywords' not available on PHP ExtractionResult
}
/** Tests page extraction and page marker configuration */
public function test_config_pages(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["pages" => ["extractPages" => true, "insertPageMarkers" => true]]));
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "PAGE")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Tests quality scoring produces a score value in [0.0, 1.0] */
public function test_config_quality_enabled(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["enableQualityProcessing" => true]));
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
}
/** Tests archive extraction with custom security limits */
public function test_config_security_limits(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["securityLimits" => ["maxArchiveSize" => 104857600, "maxCompressionRatio" => 50, "maxFilesInArchive" => 100]]));
$result = Kreuzberg::extractFileSync("archives/documents.zip", null, $config);
$found = false;
if (str_contains($result->mimeType, "application/zip")) { $found = true; }
if (str_contains($result->mimeType, "application/x-zip-compressed")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
}
/** Tests tree-sitter configuration round-trip */
public function test_config_tree_sitter(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["treeSitter" => ["groups" => ["web"], "languages" => ["python", "rust"], "process" => ["comments" => false, "diagnostics" => false, "docstrings" => false, "exports" => true, "imports" => true, "structure" => true, "symbols" => false]]]));
$result = Kreuzberg::extractFileSync("code/hello.py", null, $config);
$this->assertEquals("text/x-source-code", trim($result->mimeType));
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
}
/** Tests markdown output format via bytes extraction API */
public function test_output_format_bytes_markdown(): void
{
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
$result = Kreuzberg::extractBytesSync($contentBytes, "application/pdf", $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'metadata.output_format' not available on result type
}
/** Tests Markdown output format */
public function test_output_format_markdown(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
// skipped: field 'metadata.output_format' not available on result type
}
}

63
e2e/php/tests/DetectionTest.php generated Normal file
View File

@@ -0,0 +1,63 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: detection. */
final class DetectionTest extends TestCase
{
/** Detect HTML MIME from bytes */
public function test_detect_mime_bytes_html(): void
{
$contentBytes = file_get_contents("html/html.html");
if ($contentBytes === false) { $this->fail("failed to read fixture: html/html.html"); }
$this->expectNotToPerformAssertions();
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
}
/** Detect PDF MIME type from bytes */
public function test_detect_mime_bytes_pdf(): void
{
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
$this->expectNotToPerformAssertions();
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
}
/** Detect PNG MIME type from bytes */
public function test_detect_mime_bytes_png(): void
{
$contentBytes = file_get_contents("images/test_hello_world.png");
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
$this->expectNotToPerformAssertions();
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
}
/** get_extensions unknown MIME */
public function test_get_extensions_unknown_mime(): void
{
$this->expectException(\Exception::class); Kreuzberg::getExtensionsForMime("application/x-totally-unknown");
}
}

View File

@@ -0,0 +1,40 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: document_extractor_management. */
final class DocumentExtractorManagementTest extends TestCase
{
/** Clear all document extractors and verify list is empty */
public function test_document_extractors_clear(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::clearDocumentExtractors();
}
/** List all registered document extractors */
public function test_extractors_list(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listDocumentExtractors(["setup" => ["lazy_init_required" => ["init_action" => "extract_file_sync", "init_data" => ["create_temp_file" => true, "temp_file_content" => "%PDF-1.4\n%EOF\n", "temp_file_name" => "test.pdf"], "languages" => ["go"]]]]);
}
}

51
e2e/php/tests/EmbedAsyncPendingTest.php generated Normal file
View File

@@ -0,0 +1,51 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: embed_async_pending. */
final class EmbedAsyncPendingTest extends TestCase
{
/** embed_texts_async: empty text list */
public function test_embed_texts_async_empty_input(): void
{
$result = Kreuzberg::embedTextsAsync([], \Kreuzberg\EmbeddingConfig::from_json('{}'));
$this->assertCount(0, $result);
}
/** embed_texts_async: basic async embedding */
public function test_embed_texts_async_happy(): void
{
$result = Kreuzberg::embedTextsAsync(["First", "Second"], \Kreuzberg\EmbeddingConfig::from_json('{}'));
$this->assertGreaterThanOrEqual(2, count($result));
}
/** embed_texts_async: preset override */
public function test_embed_texts_async_preset_switch(): void
{
$config = \Kreuzberg\EmbeddingConfig::from_json(json_encode(["model" => ["name" => "balanced", "type" => "preset"]]));
$this->expectNotToPerformAssertions();
$result = Kreuzberg::embedTextsAsync(["Text"], $config);
}
}

31
e2e/php/tests/EmbedExtraTest.php generated Normal file
View File

@@ -0,0 +1,31 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\EmbeddingConfig;
/** E2e tests for category: embed_extra. */
final class EmbedExtraTest extends TestCase
{
/** Batch embed texts */
public function test_embed_texts_batch(): void
{
$config = \Kreuzberg\EmbeddingConfig::from_json(json_encode(["model" => ["name" => "balanced", "type" => "preset"]]));
$this->expectNotToPerformAssertions();
$result = Kreuzberg::embedTexts(["Hello", "World"], $config);
}
}

View File

@@ -0,0 +1,40 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: embedding_backend_management. */
final class EmbeddingBackendManagementTest extends TestCase
{
/** Clear all embedding backends and verify list is empty */
public function test_embedding_backends_clear(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::clearEmbeddingBackends();
}
/** List all registered embedding backends */
public function test_embedding_backends_list(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listEmbeddingBackends();
}
}

74
e2e/php/tests/EmbeddingsTest.php generated Normal file
View File

@@ -0,0 +1,74 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\EmbeddingConfig;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: embeddings. */
final class EmbeddingsTest extends TestCase
{
/** embed_texts: multilingual preset */
public function test_embed_texts_different_preset(): void
{
$config = \Kreuzberg\EmbeddingConfig::from_json(json_encode(["model" => ["name" => "multilingual", "type" => "preset"]]));
$result = Kreuzberg::embedTexts(["Hello world", "Test"], $config);
$this->assertGreaterThanOrEqual(2, count($result));
}
/** get_embedding_preset: known preset */
public function test_get_embedding_preset_known(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::getEmbeddingPreset("balanced");
}
/** get_embedding_preset: nominal case */
public function test_get_embedding_preset_nominal(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::getEmbeddingPreset("balanced");
}
/** get_embedding_preset: unknown preset fails */
public function test_get_embedding_preset_unknown(): void
{
$result = Kreuzberg::getEmbeddingPreset("nonexistent-xyz");
$this->assertEmpty($result);
}
/** list_embedding_presets: returns at least one */
public function test_list_embedding_presets_sanity(): void
{
$result = Kreuzberg::listEmbeddingPresets();
$this->assertNotEmpty($result);
}
}

73
e2e/php/tests/ErrorTest.php generated Normal file
View File

@@ -0,0 +1,73 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: error. */
final class ErrorTest extends TestCase
{
/** Graceful handling of empty bytes (should not error) */
public function test_error_empty_bytes(): void
{
$contentBytes = file_get_contents("text/empty.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/empty.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$this->expectNotToPerformAssertions();
$result = Kreuzberg::extractBytesSync($contentBytes, "text/plain", $config);
}
/** Error when extracting with empty MIME type */
public function test_error_empty_mime(): void
{
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
Kreuzberg::extractBytesSync($contentBytes, "", $config);
}
/** extract_bytes force+disable OCR */
public function test_error_extract_bytes_conflicting_ocr(): void
{
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/fake_text.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/fake_text.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["disableOcr" => true, "forceOcr" => true]));
Kreuzberg::extractBytesSync($contentBytes, "text/plain", $config);
}
/** Error when extracting with invalid MIME type format */
public function test_error_invalid_mime_format(): void
{
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
Kreuzberg::extractBytesSync($contentBytes, "not-a-mime", $config);
}
/** Error when extracting with unsupported MIME type */
public function test_error_unsupported_mime(): void
{
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
Kreuzberg::extractBytesSync($contentBytes, "application/x-nonexistent", $config);
}
}

84
e2e/php/tests/FormatSpecificTest.php generated Normal file
View File

@@ -0,0 +1,84 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: format_specific. */
final class FormatSpecificTest extends TestCase
{
/** Standalone DOCX extraction using extract_bytes_sync */
public function test_format_docx_standalone(): void
{
$contentBytes = file_get_contents("docx/fake.docx");
if ($contentBytes === false) { $this->fail("failed to read fixture: docx/fake.docx"); }
$result = Kreuzberg::extractBytesSync($contentBytes, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
}
/** Standalone HWPX extraction using extract_bytes_sync */
public function test_format_hwpx_standalone(): void
{
$contentBytes = file_get_contents("hwpx/simple.hwpx");
if ($contentBytes === false) { $this->fail("failed to read fixture: hwpx/simple.hwpx"); }
$result = Kreuzberg::extractBytesSync($contentBytes, "application/haansofthwpx", \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
$this->assertStringContainsString("Hello from HWPX", $result->getContent());
}
/** Standalone PDF text extraction using extract_bytes_sync */
public function test_format_pdf_text(): void
{
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
$result = Kreuzberg::extractBytesSync($contentBytes, "application/pdf", \Kreuzberg\ExtractionConfig::from_json('{}'));
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
if (str_contains($result->getContent(), "May")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** PPTX presentation extraction using extract_file_sync */
public function test_format_pptx(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::extractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", \Kreuzberg\ExtractionConfig::from_json('{}'));
}
/** XLSX spreadsheet extraction using extract_file_sync */
public function test_format_xlsx(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::extractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", \Kreuzberg\ExtractionConfig::from_json('{}'));
}
}

62
e2e/php/tests/MimeUtilitiesTest.php generated Normal file
View File

@@ -0,0 +1,62 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: mime_utilities. */
final class MimeUtilitiesTest extends TestCase
{
/** Detect MIME type from file bytes */
public function test_mime_detect_bytes(): void
{
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
$this->assertStringContainsString("pdf", $result);
}
/** Detect MIME type from PNG image bytes */
public function test_mime_detect_image(): void
{
$contentBytes = file_get_contents("images/test_hello_world.png");
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
$this->assertStringContainsString("png", $result);
}
/** Get file extensions for a MIME type */
public function test_mime_get_extensions(): void
{
$result = Kreuzberg::getExtensionsForMime("application/pdf");
$found = false;
foreach ($result as $item) {
$itemStr = is_object($item) ? json_encode($item) : (string)$item;
if (stripos($itemStr, "pdf") !== false) { $found = true; }
}
$this->assertTrue($found, 'expected array to contain string');
}
}

View File

@@ -0,0 +1,50 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: ocr_backend_management. */
final class OcrBackendManagementTest extends TestCase
{
/** Clear all OCR backends and verify list is empty */
public function test_ocr_backends_clear(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::clearOcrBackends();
}
/** List all registered OCR backends */
public function test_ocr_backends_list(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listOcrBackends();
}
/** Unregister nonexistent OCR backend gracefully */
public function test_ocr_backends_unregister(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::unregisterOcrBackend("nonexistent-backend-xyz");
}
}

42
e2e/php/tests/PdfTest.php generated Normal file
View File

@@ -0,0 +1,42 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: pdf. */
final class PdfTest extends TestCase
{
/** render_pdf_page_to_png: first page */
public function test_render_pdf_page_first(): void
{
$pdf_bytesBytes = file_get_contents("pdf/fake_memo.pdf");
if ($pdf_bytesBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
$result = Kreuzberg::renderPdfPageToPng($pdf_bytesBytes, 0);
$this->assertGreaterThanOrEqual(100, strlen($result));
}
/** render_pdf_page_to_png: page out of range */
public function test_render_pdf_page_out_of_range(): void
{
$this->expectException(\Exception::class); $pdf_bytesBytes = file_get_contents("pdf/fake_memo.pdf");
if ($pdf_bytesBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
Kreuzberg::renderPdfPageToPng($pdf_bytesBytes, 999);
}
}

214
e2e/php/tests/PluginApiTest.php generated Normal file
View File

@@ -0,0 +1,214 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: plugin_api. */
final class PluginApiTest extends TestCase
{
/** register_document_extractor: trait bridge */
public function test_register_document_extractor_trait_bridge(): void
{
$stub = new class implements \Kreuzberg\DocumentExtractor {
public function name(): string { return 'test-extractor'; }
public function extract_bytes($content, $mime_type, $config): mixed { return '{}'; }
public function extract_file($path, $mime_type, $config): mixed { return '{}'; }
public function supported_mime_types(): mixed { return []; }
public function priority(): mixed { return 1; }
public function can_handle($_path, $_mime_type): mixed { return false; }
public function version(): mixed { return ''; }
public function initialize(): mixed { return null; }
public function shutdown(): mixed { return null; }
public function description(): mixed { return ''; }
public function author(): mixed { return ''; }
};
$this->expectNotToPerformAssertions();
$result = Kreuzberg::registerDocumentExtractor($stub);
Kreuzberg::unregisterDocumentExtractor("test-extractor");
}
/** register_embedding_backend: trait bridge */
public function test_register_embedding_backend_trait_bridge(): void
{
$stub = new class implements \Kreuzberg\EmbeddingBackend {
public function name(): string { return 'test-embedding-backend'; }
public function dimensions(): mixed { return 1; }
public function embed($texts): mixed { return []; }
public function version(): mixed { return ''; }
public function initialize(): mixed { return null; }
public function shutdown(): mixed { return null; }
public function description(): mixed { return ''; }
public function author(): mixed { return ''; }
};
$this->expectNotToPerformAssertions();
$result = Kreuzberg::registerEmbeddingBackend($stub);
Kreuzberg::unregisterEmbeddingBackend("test-embedding-backend");
}
/** register_ocr_backend: trait bridge */
public function test_register_ocr_backend_trait_bridge(): void
{
$stub = new class implements \Kreuzberg\OcrBackend {
public function name(): string { return 'test-backend'; }
public function process_image($image_bytes, $config): mixed { return '{}'; }
public function process_image_file($path, $config): mixed { return '{}'; }
public function supports_language($lang): mixed { return false; }
public function backend_type(): mixed { return '{}'; }
public function supported_languages(): mixed { return []; }
public function supports_table_detection(): mixed { return false; }
public function supports_document_processing(): mixed { return false; }
public function process_document($_path, $_config): mixed { return '{}'; }
public function version(): mixed { return ''; }
public function initialize(): mixed { return null; }
public function shutdown(): mixed { return null; }
public function description(): mixed { return ''; }
public function author(): mixed { return ''; }
};
$this->expectNotToPerformAssertions();
$result = Kreuzberg::registerOcrBackend($stub);
Kreuzberg::unregisterOcrBackend("test-backend");
}
/** register_post_processor: trait bridge */
public function test_register_post_processor_trait_bridge(): void
{
$stub = new class implements \Kreuzberg\PostProcessor {
public function name(): string { return 'test-processor'; }
public function process($result, $config): mixed { return null; }
public function processing_stage(): mixed { return '{}'; }
public function should_process($_result, $_config): mixed { return false; }
public function estimated_duration_ms($_result): mixed { return 1; }
public function priority(): mixed { return 1; }
public function version(): mixed { return ''; }
public function initialize(): mixed { return null; }
public function shutdown(): mixed { return null; }
public function description(): mixed { return ''; }
public function author(): mixed { return ''; }
};
$this->expectNotToPerformAssertions();
$result = Kreuzberg::registerPostProcessor($stub);
Kreuzberg::unregisterPostProcessor("test-processor");
}
/** register_renderer: trait bridge */
public function test_register_renderer_trait_bridge(): void
{
$stub = new class implements \Kreuzberg\Renderer {
public function name(): string { return 'test-renderer'; }
public function render($doc): mixed { return ''; }
public function version(): mixed { return ''; }
public function initialize(): mixed { return null; }
public function shutdown(): mixed { return null; }
public function description(): mixed { return ''; }
public function author(): mixed { return ''; }
};
$this->expectNotToPerformAssertions();
$result = Kreuzberg::registerRenderer($stub);
Kreuzberg::unregisterRenderer("test-renderer");
}
/** register_validator: trait bridge */
public function test_register_validator_trait_bridge(): void
{
$stub = new class implements \Kreuzberg\Validator {
public function name(): string { return 'test-validator'; }
public function validate($result, $config): mixed { return null; }
public function should_validate($_result, $_config): mixed { return false; }
public function priority(): mixed { return 1; }
public function version(): mixed { return ''; }
public function initialize(): mixed { return null; }
public function shutdown(): mixed { return null; }
public function description(): mixed { return ''; }
public function author(): mixed { return ''; }
};
$this->expectNotToPerformAssertions();
$result = Kreuzberg::registerValidator($stub);
Kreuzberg::unregisterValidator("test-validator");
}
/** unregister_document_extractor */
public function test_unregister_document_extractor_after_register(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::unregisterDocumentExtractor("test-extractor");
}
/** unregister_embedding_backend */
public function test_unregister_embedding_backend_after_register(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::unregisterEmbeddingBackend("test-embedding-backend");
}
/** unregister_post_processor */
public function test_unregister_post_processor_after_register(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::unregisterPostProcessor("test-processor");
}
/** unregister_renderer */
public function test_unregister_renderer_after_register(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::unregisterRenderer("test-renderer");
}
/** unregister_validator */
public function test_unregister_validator_after_register(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::unregisterValidator("test-validator");
}
}

View File

@@ -0,0 +1,40 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: post_processor_management. */
final class PostProcessorManagementTest extends TestCase
{
/** Clear all post-processors and verify list is empty */
public function test_post_processors_clear(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::clearPostProcessors();
}
/** List all registered post-processors */
public function test_post_processors_list(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listPostProcessors();
}
}

50
e2e/php/tests/RegistryOperationsTest.php generated Normal file
View File

@@ -0,0 +1,50 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: registry_operations. */
final class RegistryOperationsTest extends TestCase
{
/** Get file extensions for DOCX MIME type */
public function test_extensions_docx(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::getExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
}
/** Get file extensions for HTML MIME type */
public function test_extensions_html(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::getExtensionsForMime("text/html");
}
/** Get file extensions for PDF MIME type */
public function test_extensions_pdf(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::getExtensionsForMime("application/pdf");
}
}

80
e2e/php/tests/RegistryTest.php generated Normal file
View File

@@ -0,0 +1,80 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: registry. */
final class RegistryTest extends TestCase
{
/** List document extractors */
public function test_list_document_extractors(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listDocumentExtractors();
}
/** List embedding backends */
public function test_list_embedding_backends(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listEmbeddingBackends();
}
/** List OCR backends */
public function test_list_ocr_backends(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listOcrBackends();
}
/** List post-processors */
public function test_list_post_processors(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listPostProcessors();
}
/** List renderers */
public function test_list_renderers(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listRenderers();
}
/** List validators */
public function test_list_validators(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listValidators();
}
}

40
e2e/php/tests/RendererManagementTest.php generated Normal file
View File

@@ -0,0 +1,40 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: renderer_management. */
final class RendererManagementTest extends TestCase
{
/** Clear all renderers and verify list is empty */
public function test_renderers_clear(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::clearRenderers();
}
/** List all registered renderers */
public function test_renderers_list(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listRenderers();
}
}

156
e2e/php/tests/SmokeTest.php generated Normal file
View File

@@ -0,0 +1,156 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: smoke. */
final class SmokeTest extends TestCase
{
/** OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge. */
public function test_ocr_image_png(): void
{
$contentBytes = file_get_contents("images/test_hello_world.png");
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractBytes($contentBytes, "image/png", $config);
$this->assertEquals("image/png", trim($result->mimeType));
$this->assertGreaterThanOrEqual(1, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Hello")) { $found = true; }
if (str_contains($result->getContent(), "World")) { $found = true; }
if (str_contains($result->getContent(), "hello")) { $found = true; }
if (str_contains($result->getContent(), "world")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: DOCX with formatted text */
public function test_smoke_docx_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", $config);
$this->assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", trim($result->mimeType));
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Lorem")) { $found = true; }
if (str_contains($result->getContent(), "ipsum")) { $found = true; }
if (str_contains($result->getContent(), "document")) { $found = true; }
if (str_contains($result->getContent(), "text")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: HTML table extraction */
public function test_smoke_html_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("html/simple_table.html", "text/html", $config);
$this->assertEquals("text/html", trim($result->mimeType));
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "Sample Data Table")) { $found = true; }
if (str_contains($result->getContent(), "Laptop")) { $found = true; }
if (str_contains($result->getContent(), "Electronics")) { $found = true; }
if (str_contains($result->getContent(), "Product")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: PNG image (without OCR, metadata only) */
public function test_smoke_image_png(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["disableOcr" => true]));
$result = Kreuzberg::extractFile("images/sample.png", null, $config);
$this->assertEquals("image/png", trim($result->mimeType));
}
/** Smoke test: JSON file extraction */
public function test_smoke_json_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("json/simple.json", "application/json", $config);
$this->assertEquals("application/json", trim($result->mimeType));
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
}
/** Smoke test: PDF with simple text extraction */
public function test_smoke_pdf_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", "application/pdf", $config);
$this->assertEquals("application/pdf", trim($result->mimeType));
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
$found = false;
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
if (str_contains($result->getContent(), "To Whom it May Concern")) { $found = true; }
$this->assertTrue($found, 'expected to contain at least one of the specified values');
}
/** Smoke test: Plain text file */
public function test_smoke_txt_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("text/report.txt", "text/plain", $config);
$this->assertEquals("text/plain", trim($result->mimeType));
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
}
/** Smoke test: XLSX with basic spreadsheet data including tables */
public function test_smoke_xlsx_basic(): void
{
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
$result = Kreuzberg::extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", $config);
$this->assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", trim($result->mimeType));
$this->assertGreaterThanOrEqual(100, strlen($result->getContent()));
$this->assertStringContainsString("Team", $result->getContent());
$this->assertStringContainsString("Location", $result->getContent());
$this->assertStringContainsString("Stanley Cups", $result->getContent());
$this->assertStringContainsString("Blues", $result->getContent());
$this->assertStringContainsString("Flyers", $result->getContent());
$this->assertStringContainsString("Maple Leafs", $result->getContent());
$this->assertStringContainsString("STL", $result->getContent());
$this->assertStringContainsString("PHI", $result->getContent());
$this->assertStringContainsString("TOR", $result->getContent());
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}

View File

@@ -0,0 +1,40 @@
<?php
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
declare(strict_types=1);
namespace Kreuzberg\E2e;
use PHPUnit\Framework\TestCase;
use Kreuzberg\Kreuzberg;
use Kreuzberg\ExtractionConfig;
/** E2e tests for category: validator_management. */
final class ValidatorManagementTest extends TestCase
{
/** Clear all validators and verify list is empty */
public function test_validators_clear(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::clearValidators();
}
/** List all registered validators */
public function test_validators_list(): void
{
$this->expectNotToPerformAssertions();
$result = Kreuzberg::listValidators();
}
}