This commit is contained in:
54
e2e/php/tests/AsyncTest.php
generated
Normal file
54
e2e/php/tests/AsyncTest.php
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: async. */
|
||||
final class AsyncTest extends TestCase
|
||||
{
|
||||
|
||||
/** Async extract_bytes call on PDF document */
|
||||
public function test_async_extract_bytes(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$result = Kreuzberg::extractBytes($contentBytes, "application/pdf", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** extract_bytes empty MIME async */
|
||||
public function test_async_extract_bytes_empty_mime(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
Kreuzberg::extractBytes($contentBytes, "", $config);
|
||||
}
|
||||
|
||||
|
||||
/** extract_bytes unsupported MIME async */
|
||||
public function test_async_extract_bytes_invalid_mime(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
Kreuzberg::extractBytes($contentBytes, "application/x-nonexistent", $config);
|
||||
}
|
||||
|
||||
}
|
||||
124
e2e/php/tests/BatchTest.php
generated
Normal file
124
e2e/php/tests/BatchTest.php
generated
Normal file
@@ -0,0 +1,124 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\BatchBytesItem;
|
||||
use Kreuzberg\BatchFileItem;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: batch. */
|
||||
final class BatchTest extends TestCase
|
||||
{
|
||||
|
||||
/** batch_extract_bytes_sync invalid MIME */
|
||||
public function test_batch_bytes_invalid_mime(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractBytesSync([new BatchBytesItem(content: "\x48\x65\x6c\x6c\x6f", mimeType: "application/x-nonexistent")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_bytes: happy path with mixed inputs */
|
||||
public function test_batch_extract_bytes_happy(): void
|
||||
{
|
||||
$result = Kreuzberg::batchExtractBytes([new BatchBytesItem(content: "\x48\x65\x6c\x6c\x6f\x2c\x20\x77\x6f\x72\x6c\x64\x21", mimeType: "text/plain"), new BatchBytesItem(content: "\x3c\x68\x74\x6d\x6c\x3e\x3c\x62\x6f\x64\x79\x3e\x54\x65\x73\x74\x3c\x2f\x62\x6f\x64\x79\x3e\x3c\x2f\x68\x74\x6d\x6c\x3e", mimeType: "text/html")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(1, count($result));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_bytes: handles unsupported MIME gracefully */
|
||||
public function test_batch_extract_bytes_mixed_format(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractBytes([new BatchBytesItem(content: "\x50\x44\x46\x20\x70\x6c\x61\x63\x65\x68\x6f\x6c\x64\x65\x72", mimeType: "application/x-unknown")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_bytes_sync: empty batch */
|
||||
public function test_batch_extract_bytes_sync_empty_list(): void
|
||||
{
|
||||
$result = Kreuzberg::batchExtractBytesSync([], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertCount(0, $result);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_bytes_sync: unsupported MIME */
|
||||
public function test_batch_extract_bytes_sync_invalid_mime(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractBytesSync([new BatchBytesItem(content: "\x64\x61\x74\x61", mimeType: "application/x-unknown")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Extract text from multiple files asynchronously */
|
||||
public function test_batch_file_async_basic(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractFiles([new BatchFileItem(path: "pdf/fake_memo.pdf"), new BatchFileItem(path: "text/fake_text.txt")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_file async nonexistent */
|
||||
public function test_batch_file_async_not_found(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractFiles([new BatchFileItem(path: "/nonexistent/a.pdf")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_file_sync nonexistent */
|
||||
public function test_batch_file_not_found(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractFilesSync([new BatchFileItem(path: "/nonexistent/a.pdf"), new BatchFileItem(path: "/nonexistent/b.txt")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** batch_extract_file_sync mixed */
|
||||
public function test_batch_file_partial(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractFilesSync([new BatchFileItem(path: "text/plain.txt"), new BatchFileItem(path: "/nonexistent/missing.pdf")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Extract text from multiple files synchronously */
|
||||
public function test_batch_file_sync_basic(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::batchExtractFilesSync([new BatchFileItem(path: "pdf/fake_memo.pdf"), new BatchFileItem(path: "text/fake_text.txt")], \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
34
e2e/php/tests/CodeTest.php
generated
Normal file
34
e2e/php/tests/CodeTest.php
generated
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: code. */
|
||||
final class CodeTest extends TestCase
|
||||
{
|
||||
|
||||
/** Test language detection from shebang line via bytes input */
|
||||
public function test_code_shebang_detection(): void
|
||||
{
|
||||
$result = Kreuzberg::extractFileSync("code/script.sh", "text/x-source-code", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertEquals("text/x-source-code", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$this->assertStringContainsString("build", $result->getContent());
|
||||
$this->assertStringContainsString("clean", $result->getContent());
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
260
e2e/php/tests/ContractTest.php
generated
Normal file
260
e2e/php/tests/ContractTest.php
generated
Normal file
@@ -0,0 +1,260 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: contract. */
|
||||
final class ContractTest extends TestCase
|
||||
{
|
||||
|
||||
/** Tests async batch bytes extraction API (batch_extract_bytes) */
|
||||
public function test_api_batch_bytes_async(): void
|
||||
{
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter) */
|
||||
public function test_api_batch_bytes_with_configs_async(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests async batch file extraction API (batch_extract_file) */
|
||||
public function test_api_batch_file_async(): void
|
||||
{
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter) */
|
||||
public function test_api_batch_file_with_configs_async(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests async bytes extraction API (extract_bytes) */
|
||||
public function test_api_extract_bytes_async(): void
|
||||
{
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests async file extraction API (extract_file) */
|
||||
public function test_api_extract_file_async(): void
|
||||
{
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", null, \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests markdown chunker prepends heading hierarchy to chunk content */
|
||||
public function test_config_chunking_prepend_heading_context(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["chunking" => ["chunkerType" => "markdown", "maxChars" => 300, "maxOverlap" => 50, "prependHeadingContext" => true]]));
|
||||
$result = Kreuzberg::extractFileSync("markdown/extraction_test.md", null, $config);
|
||||
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'chunks' not available on result type $this->assertTrue(array_reduce($result->chunks ?? [], fn($carry, $c) => $carry && !empty($c->content), true)); // skipped: field 'chunks_have_heading_context' not available on result type // skipped: field 'first_chunk_starts_with_heading' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests document structure with DOCX heading-driven nesting */
|
||||
public function test_config_document_structure_with_headings(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["includeDocumentStructure" => true]));
|
||||
$result = Kreuzberg::extractFileSync("docx/fake.docx", null, $config);
|
||||
|
||||
$this->assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", trim($result->mimeType));
|
||||
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests element-based result format with element type assertions on DOCX */
|
||||
public function test_config_element_types(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["resultFormat" => "element_based"]));
|
||||
$result = Kreuzberg::extractFileSync("docx/unit_test_headers.docx", null, $config);
|
||||
|
||||
$found = false;
|
||||
if (str_contains($result->mimeType, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
// skipped: field 'elements' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions */
|
||||
public function test_config_extraction_timeout(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["extractionTimeoutSecs" => 300]));
|
||||
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests keyword extraction via YAKE algorithm */
|
||||
public function test_config_keywords(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["keywords" => ["algorithm" => "yake", "maxKeywords" => 10]]));
|
||||
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'keywords' not available on PHP ExtractionResult // skipped: field 'keywords' not available on PHP ExtractionResult
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests page extraction and page marker configuration */
|
||||
public function test_config_pages(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["pages" => ["extractPages" => true, "insertPageMarkers" => true]]));
|
||||
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "PAGE")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests quality scoring produces a score value in [0.0, 1.0] */
|
||||
public function test_config_quality_enabled(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["enableQualityProcessing" => true]));
|
||||
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests archive extraction with custom security limits */
|
||||
public function test_config_security_limits(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["securityLimits" => ["maxArchiveSize" => 104857600, "maxCompressionRatio" => 50, "maxFilesInArchive" => 100]]));
|
||||
$result = Kreuzberg::extractFileSync("archives/documents.zip", null, $config);
|
||||
|
||||
$found = false;
|
||||
if (str_contains($result->mimeType, "application/zip")) { $found = true; }
|
||||
if (str_contains($result->mimeType, "application/x-zip-compressed")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests tree-sitter configuration round-trip */
|
||||
public function test_config_tree_sitter(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["treeSitter" => ["groups" => ["web"], "languages" => ["python", "rust"], "process" => ["comments" => false, "diagnostics" => false, "docstrings" => false, "exports" => true, "imports" => true, "structure" => true, "symbols" => false]]]));
|
||||
$result = Kreuzberg::extractFileSync("code/hello.py", null, $config);
|
||||
|
||||
$this->assertEquals("text/x-source-code", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests markdown output format via bytes extraction API */
|
||||
public function test_output_format_bytes_markdown(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/pdf", $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Tests Markdown output format */
|
||||
public function test_output_format_markdown(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["outputFormat" => "markdown"]));
|
||||
$result = Kreuzberg::extractFileSync("pdf/fake_memo.pdf", null, $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
63
e2e/php/tests/DetectionTest.php
generated
Normal file
63
e2e/php/tests/DetectionTest.php
generated
Normal file
@@ -0,0 +1,63 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: detection. */
|
||||
final class DetectionTest extends TestCase
|
||||
{
|
||||
|
||||
/** Detect HTML MIME from bytes */
|
||||
public function test_detect_mime_bytes_html(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("html/html.html");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: html/html.html"); }
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Detect PDF MIME type from bytes */
|
||||
public function test_detect_mime_bytes_pdf(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Detect PNG MIME type from bytes */
|
||||
public function test_detect_mime_bytes_png(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("images/test_hello_world.png");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** get_extensions unknown MIME */
|
||||
public function test_get_extensions_unknown_mime(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); Kreuzberg::getExtensionsForMime("application/x-totally-unknown");
|
||||
}
|
||||
|
||||
}
|
||||
40
e2e/php/tests/DocumentExtractorManagementTest.php
generated
Normal file
40
e2e/php/tests/DocumentExtractorManagementTest.php
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: document_extractor_management. */
|
||||
final class DocumentExtractorManagementTest extends TestCase
|
||||
{
|
||||
|
||||
/** Clear all document extractors and verify list is empty */
|
||||
public function test_document_extractors_clear(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::clearDocumentExtractors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List all registered document extractors */
|
||||
public function test_extractors_list(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listDocumentExtractors(["setup" => ["lazy_init_required" => ["init_action" => "extract_file_sync", "init_data" => ["create_temp_file" => true, "temp_file_content" => "%PDF-1.4\n%EOF\n", "temp_file_name" => "test.pdf"], "languages" => ["go"]]]]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
51
e2e/php/tests/EmbedAsyncPendingTest.php
generated
Normal file
51
e2e/php/tests/EmbedAsyncPendingTest.php
generated
Normal file
@@ -0,0 +1,51 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: embed_async_pending. */
|
||||
final class EmbedAsyncPendingTest extends TestCase
|
||||
{
|
||||
|
||||
/** embed_texts_async: empty text list */
|
||||
public function test_embed_texts_async_empty_input(): void
|
||||
{
|
||||
$result = Kreuzberg::embedTextsAsync([], \Kreuzberg\EmbeddingConfig::from_json('{}'));
|
||||
|
||||
$this->assertCount(0, $result);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** embed_texts_async: basic async embedding */
|
||||
public function test_embed_texts_async_happy(): void
|
||||
{
|
||||
$result = Kreuzberg::embedTextsAsync(["First", "Second"], \Kreuzberg\EmbeddingConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(2, count($result));
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** embed_texts_async: preset override */
|
||||
public function test_embed_texts_async_preset_switch(): void
|
||||
{
|
||||
$config = \Kreuzberg\EmbeddingConfig::from_json(json_encode(["model" => ["name" => "balanced", "type" => "preset"]]));
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::embedTextsAsync(["Text"], $config);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
31
e2e/php/tests/EmbedExtraTest.php
generated
Normal file
31
e2e/php/tests/EmbedExtraTest.php
generated
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\EmbeddingConfig;
|
||||
|
||||
/** E2e tests for category: embed_extra. */
|
||||
final class EmbedExtraTest extends TestCase
|
||||
{
|
||||
|
||||
/** Batch embed texts */
|
||||
public function test_embed_texts_batch(): void
|
||||
{
|
||||
$config = \Kreuzberg\EmbeddingConfig::from_json(json_encode(["model" => ["name" => "balanced", "type" => "preset"]]));
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::embedTexts(["Hello", "World"], $config);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
40
e2e/php/tests/EmbeddingBackendManagementTest.php
generated
Normal file
40
e2e/php/tests/EmbeddingBackendManagementTest.php
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: embedding_backend_management. */
|
||||
final class EmbeddingBackendManagementTest extends TestCase
|
||||
{
|
||||
|
||||
/** Clear all embedding backends and verify list is empty */
|
||||
public function test_embedding_backends_clear(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::clearEmbeddingBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List all registered embedding backends */
|
||||
public function test_embedding_backends_list(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listEmbeddingBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
74
e2e/php/tests/EmbeddingsTest.php
generated
Normal file
74
e2e/php/tests/EmbeddingsTest.php
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\EmbeddingConfig;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: embeddings. */
|
||||
final class EmbeddingsTest extends TestCase
|
||||
{
|
||||
|
||||
/** embed_texts: multilingual preset */
|
||||
public function test_embed_texts_different_preset(): void
|
||||
{
|
||||
$config = \Kreuzberg\EmbeddingConfig::from_json(json_encode(["model" => ["name" => "multilingual", "type" => "preset"]]));
|
||||
$result = Kreuzberg::embedTexts(["Hello world", "Test"], $config);
|
||||
|
||||
$this->assertGreaterThanOrEqual(2, count($result));
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** get_embedding_preset: known preset */
|
||||
public function test_get_embedding_preset_known(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::getEmbeddingPreset("balanced");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** get_embedding_preset: nominal case */
|
||||
public function test_get_embedding_preset_nominal(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::getEmbeddingPreset("balanced");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** get_embedding_preset: unknown preset fails */
|
||||
public function test_get_embedding_preset_unknown(): void
|
||||
{
|
||||
$result = Kreuzberg::getEmbeddingPreset("nonexistent-xyz");
|
||||
|
||||
$this->assertEmpty($result);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** list_embedding_presets: returns at least one */
|
||||
public function test_list_embedding_presets_sanity(): void
|
||||
{
|
||||
$result = Kreuzberg::listEmbeddingPresets();
|
||||
|
||||
$this->assertNotEmpty($result);
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
73
e2e/php/tests/ErrorTest.php
generated
Normal file
73
e2e/php/tests/ErrorTest.php
generated
Normal file
@@ -0,0 +1,73 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: error. */
|
||||
final class ErrorTest extends TestCase
|
||||
{
|
||||
|
||||
/** Graceful handling of empty bytes (should not error) */
|
||||
public function test_error_empty_bytes(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("text/empty.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/empty.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "text/plain", $config);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Error when extracting with empty MIME type */
|
||||
public function test_error_empty_mime(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
Kreuzberg::extractBytesSync($contentBytes, "", $config);
|
||||
}
|
||||
|
||||
|
||||
/** extract_bytes force+disable OCR */
|
||||
public function test_error_extract_bytes_conflicting_ocr(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/fake_text.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/fake_text.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["disableOcr" => true, "forceOcr" => true]));
|
||||
Kreuzberg::extractBytesSync($contentBytes, "text/plain", $config);
|
||||
}
|
||||
|
||||
|
||||
/** Error when extracting with invalid MIME type format */
|
||||
public function test_error_invalid_mime_format(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
Kreuzberg::extractBytesSync($contentBytes, "not-a-mime", $config);
|
||||
}
|
||||
|
||||
|
||||
/** Error when extracting with unsupported MIME type */
|
||||
public function test_error_unsupported_mime(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $contentBytes = file_get_contents("text/plain.txt");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: text/plain.txt"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
Kreuzberg::extractBytesSync($contentBytes, "application/x-nonexistent", $config);
|
||||
}
|
||||
|
||||
}
|
||||
84
e2e/php/tests/FormatSpecificTest.php
generated
Normal file
84
e2e/php/tests/FormatSpecificTest.php
generated
Normal file
@@ -0,0 +1,84 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: format_specific. */
|
||||
final class FormatSpecificTest extends TestCase
|
||||
{
|
||||
|
||||
/** Standalone DOCX extraction using extract_bytes_sync */
|
||||
public function test_format_docx_standalone(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("docx/fake.docx");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: docx/fake.docx"); }
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Standalone HWPX extraction using extract_bytes_sync */
|
||||
public function test_format_hwpx_standalone(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("hwpx/simple.hwpx");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: hwpx/simple.hwpx"); }
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/haansofthwpx", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
|
||||
$this->assertStringContainsString("Hello from HWPX", $result->getContent());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Standalone PDF text extraction using extract_bytes_sync */
|
||||
public function test_format_pdf_text(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$result = Kreuzberg::extractBytesSync($contentBytes, "application/pdf", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "Mallori")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "May")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** PPTX presentation extraction using extract_file_sync */
|
||||
public function test_format_pptx(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::extractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** XLSX spreadsheet extraction using extract_file_sync */
|
||||
public function test_format_xlsx(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::extractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", \Kreuzberg\ExtractionConfig::from_json('{}'));
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
62
e2e/php/tests/MimeUtilitiesTest.php
generated
Normal file
62
e2e/php/tests/MimeUtilitiesTest.php
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: mime_utilities. */
|
||||
final class MimeUtilitiesTest extends TestCase
|
||||
{
|
||||
|
||||
/** Detect MIME type from file bytes */
|
||||
public function test_mime_detect_bytes(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
|
||||
|
||||
$this->assertStringContainsString("pdf", $result);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Detect MIME type from PNG image bytes */
|
||||
public function test_mime_detect_image(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("images/test_hello_world.png");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
|
||||
$result = Kreuzberg::detectMimeTypeFromBytes($contentBytes);
|
||||
|
||||
$this->assertStringContainsString("png", $result);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Get file extensions for a MIME type */
|
||||
public function test_mime_get_extensions(): void
|
||||
{
|
||||
$result = Kreuzberg::getExtensionsForMime("application/pdf");
|
||||
|
||||
$found = false;
|
||||
foreach ($result as $item) {
|
||||
$itemStr = is_object($item) ? json_encode($item) : (string)$item;
|
||||
if (stripos($itemStr, "pdf") !== false) { $found = true; }
|
||||
}
|
||||
$this->assertTrue($found, 'expected array to contain string');
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
50
e2e/php/tests/OcrBackendManagementTest.php
generated
Normal file
50
e2e/php/tests/OcrBackendManagementTest.php
generated
Normal file
@@ -0,0 +1,50 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: ocr_backend_management. */
|
||||
final class OcrBackendManagementTest extends TestCase
|
||||
{
|
||||
|
||||
/** Clear all OCR backends and verify list is empty */
|
||||
public function test_ocr_backends_clear(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::clearOcrBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List all registered OCR backends */
|
||||
public function test_ocr_backends_list(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listOcrBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Unregister nonexistent OCR backend gracefully */
|
||||
public function test_ocr_backends_unregister(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::unregisterOcrBackend("nonexistent-backend-xyz");
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
42
e2e/php/tests/PdfTest.php
generated
Normal file
42
e2e/php/tests/PdfTest.php
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: pdf. */
|
||||
final class PdfTest extends TestCase
|
||||
{
|
||||
|
||||
/** render_pdf_page_to_png: first page */
|
||||
public function test_render_pdf_page_first(): void
|
||||
{
|
||||
$pdf_bytesBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($pdf_bytesBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
$result = Kreuzberg::renderPdfPageToPng($pdf_bytesBytes, 0);
|
||||
|
||||
$this->assertGreaterThanOrEqual(100, strlen($result));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** render_pdf_page_to_png: page out of range */
|
||||
public function test_render_pdf_page_out_of_range(): void
|
||||
{
|
||||
$this->expectException(\Exception::class); $pdf_bytesBytes = file_get_contents("pdf/fake_memo.pdf");
|
||||
if ($pdf_bytesBytes === false) { $this->fail("failed to read fixture: pdf/fake_memo.pdf"); }
|
||||
Kreuzberg::renderPdfPageToPng($pdf_bytesBytes, 999);
|
||||
}
|
||||
|
||||
}
|
||||
214
e2e/php/tests/PluginApiTest.php
generated
Normal file
214
e2e/php/tests/PluginApiTest.php
generated
Normal file
@@ -0,0 +1,214 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: plugin_api. */
|
||||
final class PluginApiTest extends TestCase
|
||||
{
|
||||
|
||||
/** register_document_extractor: trait bridge */
|
||||
public function test_register_document_extractor_trait_bridge(): void
|
||||
{
|
||||
$stub = new class implements \Kreuzberg\DocumentExtractor {
|
||||
public function name(): string { return 'test-extractor'; }
|
||||
public function extract_bytes($content, $mime_type, $config): mixed { return '{}'; }
|
||||
public function extract_file($path, $mime_type, $config): mixed { return '{}'; }
|
||||
public function supported_mime_types(): mixed { return []; }
|
||||
public function priority(): mixed { return 1; }
|
||||
public function can_handle($_path, $_mime_type): mixed { return false; }
|
||||
public function version(): mixed { return ''; }
|
||||
public function initialize(): mixed { return null; }
|
||||
public function shutdown(): mixed { return null; }
|
||||
public function description(): mixed { return ''; }
|
||||
public function author(): mixed { return ''; }
|
||||
};
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::registerDocumentExtractor($stub);
|
||||
|
||||
|
||||
Kreuzberg::unregisterDocumentExtractor("test-extractor");
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** register_embedding_backend: trait bridge */
|
||||
public function test_register_embedding_backend_trait_bridge(): void
|
||||
{
|
||||
$stub = new class implements \Kreuzberg\EmbeddingBackend {
|
||||
public function name(): string { return 'test-embedding-backend'; }
|
||||
public function dimensions(): mixed { return 1; }
|
||||
public function embed($texts): mixed { return []; }
|
||||
public function version(): mixed { return ''; }
|
||||
public function initialize(): mixed { return null; }
|
||||
public function shutdown(): mixed { return null; }
|
||||
public function description(): mixed { return ''; }
|
||||
public function author(): mixed { return ''; }
|
||||
};
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::registerEmbeddingBackend($stub);
|
||||
|
||||
|
||||
Kreuzberg::unregisterEmbeddingBackend("test-embedding-backend");
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** register_ocr_backend: trait bridge */
|
||||
public function test_register_ocr_backend_trait_bridge(): void
|
||||
{
|
||||
$stub = new class implements \Kreuzberg\OcrBackend {
|
||||
public function name(): string { return 'test-backend'; }
|
||||
public function process_image($image_bytes, $config): mixed { return '{}'; }
|
||||
public function process_image_file($path, $config): mixed { return '{}'; }
|
||||
public function supports_language($lang): mixed { return false; }
|
||||
public function backend_type(): mixed { return '{}'; }
|
||||
public function supported_languages(): mixed { return []; }
|
||||
public function supports_table_detection(): mixed { return false; }
|
||||
public function supports_document_processing(): mixed { return false; }
|
||||
public function process_document($_path, $_config): mixed { return '{}'; }
|
||||
public function version(): mixed { return ''; }
|
||||
public function initialize(): mixed { return null; }
|
||||
public function shutdown(): mixed { return null; }
|
||||
public function description(): mixed { return ''; }
|
||||
public function author(): mixed { return ''; }
|
||||
};
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::registerOcrBackend($stub);
|
||||
|
||||
|
||||
Kreuzberg::unregisterOcrBackend("test-backend");
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** register_post_processor: trait bridge */
|
||||
public function test_register_post_processor_trait_bridge(): void
|
||||
{
|
||||
$stub = new class implements \Kreuzberg\PostProcessor {
|
||||
public function name(): string { return 'test-processor'; }
|
||||
public function process($result, $config): mixed { return null; }
|
||||
public function processing_stage(): mixed { return '{}'; }
|
||||
public function should_process($_result, $_config): mixed { return false; }
|
||||
public function estimated_duration_ms($_result): mixed { return 1; }
|
||||
public function priority(): mixed { return 1; }
|
||||
public function version(): mixed { return ''; }
|
||||
public function initialize(): mixed { return null; }
|
||||
public function shutdown(): mixed { return null; }
|
||||
public function description(): mixed { return ''; }
|
||||
public function author(): mixed { return ''; }
|
||||
};
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::registerPostProcessor($stub);
|
||||
|
||||
|
||||
Kreuzberg::unregisterPostProcessor("test-processor");
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** register_renderer: trait bridge */
|
||||
public function test_register_renderer_trait_bridge(): void
|
||||
{
|
||||
$stub = new class implements \Kreuzberg\Renderer {
|
||||
public function name(): string { return 'test-renderer'; }
|
||||
public function render($doc): mixed { return ''; }
|
||||
public function version(): mixed { return ''; }
|
||||
public function initialize(): mixed { return null; }
|
||||
public function shutdown(): mixed { return null; }
|
||||
public function description(): mixed { return ''; }
|
||||
public function author(): mixed { return ''; }
|
||||
};
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::registerRenderer($stub);
|
||||
|
||||
|
||||
Kreuzberg::unregisterRenderer("test-renderer");
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** register_validator: trait bridge */
|
||||
public function test_register_validator_trait_bridge(): void
|
||||
{
|
||||
$stub = new class implements \Kreuzberg\Validator {
|
||||
public function name(): string { return 'test-validator'; }
|
||||
public function validate($result, $config): mixed { return null; }
|
||||
public function should_validate($_result, $_config): mixed { return false; }
|
||||
public function priority(): mixed { return 1; }
|
||||
public function version(): mixed { return ''; }
|
||||
public function initialize(): mixed { return null; }
|
||||
public function shutdown(): mixed { return null; }
|
||||
public function description(): mixed { return ''; }
|
||||
public function author(): mixed { return ''; }
|
||||
};
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::registerValidator($stub);
|
||||
|
||||
|
||||
Kreuzberg::unregisterValidator("test-validator");
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** unregister_document_extractor */
|
||||
public function test_unregister_document_extractor_after_register(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::unregisterDocumentExtractor("test-extractor");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** unregister_embedding_backend */
|
||||
public function test_unregister_embedding_backend_after_register(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::unregisterEmbeddingBackend("test-embedding-backend");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** unregister_post_processor */
|
||||
public function test_unregister_post_processor_after_register(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::unregisterPostProcessor("test-processor");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** unregister_renderer */
|
||||
public function test_unregister_renderer_after_register(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::unregisterRenderer("test-renderer");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** unregister_validator */
|
||||
public function test_unregister_validator_after_register(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::unregisterValidator("test-validator");
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
40
e2e/php/tests/PostProcessorManagementTest.php
generated
Normal file
40
e2e/php/tests/PostProcessorManagementTest.php
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: post_processor_management. */
|
||||
final class PostProcessorManagementTest extends TestCase
|
||||
{
|
||||
|
||||
/** Clear all post-processors and verify list is empty */
|
||||
public function test_post_processors_clear(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::clearPostProcessors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List all registered post-processors */
|
||||
public function test_post_processors_list(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listPostProcessors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
50
e2e/php/tests/RegistryOperationsTest.php
generated
Normal file
50
e2e/php/tests/RegistryOperationsTest.php
generated
Normal file
@@ -0,0 +1,50 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: registry_operations. */
|
||||
final class RegistryOperationsTest extends TestCase
|
||||
{
|
||||
|
||||
/** Get file extensions for DOCX MIME type */
|
||||
public function test_extensions_docx(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::getExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Get file extensions for HTML MIME type */
|
||||
public function test_extensions_html(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::getExtensionsForMime("text/html");
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Get file extensions for PDF MIME type */
|
||||
public function test_extensions_pdf(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::getExtensionsForMime("application/pdf");
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
80
e2e/php/tests/RegistryTest.php
generated
Normal file
80
e2e/php/tests/RegistryTest.php
generated
Normal file
@@ -0,0 +1,80 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: registry. */
|
||||
final class RegistryTest extends TestCase
|
||||
{
|
||||
|
||||
/** List document extractors */
|
||||
public function test_list_document_extractors(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listDocumentExtractors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List embedding backends */
|
||||
public function test_list_embedding_backends(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listEmbeddingBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List OCR backends */
|
||||
public function test_list_ocr_backends(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listOcrBackends();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List post-processors */
|
||||
public function test_list_post_processors(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listPostProcessors();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List renderers */
|
||||
public function test_list_renderers(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listRenderers();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List validators */
|
||||
public function test_list_validators(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listValidators();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
40
e2e/php/tests/RendererManagementTest.php
generated
Normal file
40
e2e/php/tests/RendererManagementTest.php
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: renderer_management. */
|
||||
final class RendererManagementTest extends TestCase
|
||||
{
|
||||
|
||||
/** Clear all renderers and verify list is empty */
|
||||
public function test_renderers_clear(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::clearRenderers();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List all registered renderers */
|
||||
public function test_renderers_list(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listRenderers();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
156
e2e/php/tests/SmokeTest.php
generated
Normal file
156
e2e/php/tests/SmokeTest.php
generated
Normal file
@@ -0,0 +1,156 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: smoke. */
|
||||
final class SmokeTest extends TestCase
|
||||
{
|
||||
|
||||
/** OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge. */
|
||||
public function test_ocr_image_png(): void
|
||||
{
|
||||
$contentBytes = file_get_contents("images/test_hello_world.png");
|
||||
if ($contentBytes === false) { $this->fail("failed to read fixture: images/test_hello_world.png"); }
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractBytes($contentBytes, "image/png", $config);
|
||||
|
||||
$this->assertEquals("image/png", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(1, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "Hello")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "World")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "hello")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "world")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: DOCX with formatted text */
|
||||
public function test_smoke_docx_basic(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", $config);
|
||||
|
||||
$this->assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(20, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "Lorem")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "ipsum")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "document")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "text")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: HTML table extraction */
|
||||
public function test_smoke_html_basic(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractFile("html/simple_table.html", "text/html", $config);
|
||||
|
||||
$this->assertEquals("text/html", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(10, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "Sample Data Table")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Laptop")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Electronics")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "Product")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: PNG image (without OCR, metadata only) */
|
||||
public function test_smoke_image_png(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json(json_encode(["disableOcr" => true]));
|
||||
$result = Kreuzberg::extractFile("images/sample.png", null, $config);
|
||||
|
||||
$this->assertEquals("image/png", trim($result->mimeType));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: JSON file extraction */
|
||||
public function test_smoke_json_basic(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractFile("json/simple.json", "application/json", $config);
|
||||
|
||||
$this->assertEquals("application/json", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: PDF with simple text extraction */
|
||||
public function test_smoke_pdf_basic(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractFile("pdf/fake_memo.pdf", "application/pdf", $config);
|
||||
|
||||
$this->assertEquals("application/pdf", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(50, strlen($result->getContent()));
|
||||
$found = false;
|
||||
if (str_contains($result->getContent(), "May 5, 2023")) { $found = true; }
|
||||
if (str_contains($result->getContent(), "To Whom it May Concern")) { $found = true; }
|
||||
$this->assertTrue($found, 'expected to contain at least one of the specified values');
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: Plain text file */
|
||||
public function test_smoke_txt_basic(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractFile("text/report.txt", "text/plain", $config);
|
||||
|
||||
$this->assertEquals("text/plain", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(5, strlen($result->getContent()));
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Smoke test: XLSX with basic spreadsheet data including tables */
|
||||
public function test_smoke_xlsx_basic(): void
|
||||
{
|
||||
$config = \Kreuzberg\ExtractionConfig::from_json('{}');
|
||||
$result = Kreuzberg::extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", $config);
|
||||
|
||||
$this->assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", trim($result->mimeType));
|
||||
$this->assertGreaterThanOrEqual(100, strlen($result->getContent()));
|
||||
$this->assertStringContainsString("Team", $result->getContent());
|
||||
$this->assertStringContainsString("Location", $result->getContent());
|
||||
$this->assertStringContainsString("Stanley Cups", $result->getContent());
|
||||
$this->assertStringContainsString("Blues", $result->getContent());
|
||||
$this->assertStringContainsString("Flyers", $result->getContent());
|
||||
$this->assertStringContainsString("Maple Leafs", $result->getContent());
|
||||
$this->assertStringContainsString("STL", $result->getContent());
|
||||
$this->assertStringContainsString("PHI", $result->getContent());
|
||||
$this->assertStringContainsString("TOR", $result->getContent());
|
||||
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
40
e2e/php/tests/ValidatorManagementTest.php
generated
Normal file
40
e2e/php/tests/ValidatorManagementTest.php
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Kreuzberg\E2e;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use Kreuzberg\Kreuzberg;
|
||||
use Kreuzberg\ExtractionConfig;
|
||||
|
||||
/** E2e tests for category: validator_management. */
|
||||
final class ValidatorManagementTest extends TestCase
|
||||
{
|
||||
|
||||
/** Clear all validators and verify list is empty */
|
||||
public function test_validators_clear(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::clearValidators();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** List all registered validators */
|
||||
public function test_validators_list(): void
|
||||
{
|
||||
$this->expectNotToPerformAssertions();
|
||||
$result = Kreuzberg::listValidators();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user