This commit is contained in:
30
fixtures/async/async_extract_bytes.json
Normal file
30
fixtures/async/async_extract_bytes.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "async_extract_bytes",
|
||||
"category": "async",
|
||||
"description": "Async extract_bytes call on PDF document",
|
||||
"tags": ["async", "api", "extract_bytes"],
|
||||
"call": "extract_bytes",
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
},
|
||||
"input": {
|
||||
"data": "pdf/fake_memo.pdf",
|
||||
"mime_type": "application/pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
9
fixtures/async/async_extract_bytes_empty_mime.json
Normal file
9
fixtures/async/async_extract_bytes_empty_mime.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "async_extract_bytes_empty_mime",
|
||||
"category": "async",
|
||||
"description": "extract_bytes empty MIME async",
|
||||
"tags": ["async", "error"],
|
||||
"call": "extract_bytes",
|
||||
"input": { "data": "text/plain.txt", "mime_type": "", "config": {} },
|
||||
"assertions": [{ "type": "error" }]
|
||||
}
|
||||
9
fixtures/async/async_extract_bytes_invalid_mime.json
Normal file
9
fixtures/async/async_extract_bytes_invalid_mime.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "async_extract_bytes_invalid_mime",
|
||||
"category": "async",
|
||||
"description": "extract_bytes unsupported MIME async",
|
||||
"tags": ["async", "error"],
|
||||
"call": "extract_bytes",
|
||||
"input": { "data": "text/plain.txt", "mime_type": "application/x-nonexistent", "config": {} },
|
||||
"assertions": [{ "type": "error" }]
|
||||
}
|
||||
11
fixtures/batch/batch_bytes_invalid_mime.json
Normal file
11
fixtures/batch/batch_bytes_invalid_mime.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "batch_bytes_invalid_mime",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_bytes_sync invalid MIME",
|
||||
"tags": ["batch", "error"],
|
||||
"call": "batch_extract_bytes_sync",
|
||||
"input": {
|
||||
"items": [{ "content": [72, 101, 108, 108, 111], "mime_type": "application/x-nonexistent" }]
|
||||
},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
30
fixtures/batch/batch_extract_bytes_happy.json
Normal file
30
fixtures/batch/batch_extract_bytes_happy.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "batch_extract_bytes_happy",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_bytes: happy path with mixed inputs",
|
||||
"call": "batch_extract_bytes",
|
||||
"input": {
|
||||
"items": [
|
||||
{
|
||||
"content": [72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33],
|
||||
"mime_type": "text/plain"
|
||||
},
|
||||
{
|
||||
"content": [
|
||||
60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111,
|
||||
100, 121, 62, 60, 47, 104, 116, 109, 108, 62
|
||||
],
|
||||
"mime_type": "text/html"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
19
fixtures/batch/batch_extract_bytes_mixed_format.json
Normal file
19
fixtures/batch/batch_extract_bytes_mixed_format.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"id": "batch_extract_bytes_mixed_format",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_bytes: handles unsupported MIME gracefully",
|
||||
"call": "batch_extract_bytes",
|
||||
"input": {
|
||||
"items": [
|
||||
{
|
||||
"content": [80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114],
|
||||
"mime_type": "application/x-unknown"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
51
fixtures/batch/batch_extract_bytes_size_cap.json
Normal file
51
fixtures/batch/batch_extract_bytes_size_cap.json
Normal file
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"id": "batch_extract_bytes_size_cap",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_bytes: archive size cap triggers error",
|
||||
"call": "batch_extract_bytes",
|
||||
"skip": {
|
||||
"languages": [
|
||||
"rust",
|
||||
"node",
|
||||
"python",
|
||||
"php",
|
||||
"wasm",
|
||||
"go",
|
||||
"r",
|
||||
"ruby",
|
||||
"csharp",
|
||||
"elixir",
|
||||
"kotlin",
|
||||
"kotlin_android",
|
||||
"swift",
|
||||
"zig",
|
||||
"java",
|
||||
"dart"
|
||||
],
|
||||
"reason": "SecurityLimits.max_content_size is only enforced by archive/Excel extractors; test requires actual archive format to trigger error, which is not easily testable via byte fixtures"
|
||||
},
|
||||
"input": {
|
||||
"items": [
|
||||
{
|
||||
"content": [
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,
|
||||
97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97
|
||||
],
|
||||
"mime_type": "text/plain"
|
||||
}
|
||||
],
|
||||
"config": {
|
||||
"security_limits": {
|
||||
"max_content_size": 1
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
18
fixtures/batch/batch_extract_bytes_sync_empty_list.json
Normal file
18
fixtures/batch/batch_extract_bytes_sync_empty_list.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": "batch_extract_bytes_sync_empty_list",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_bytes_sync: empty batch",
|
||||
"call": "batch_extract_bytes_sync",
|
||||
"input": {
|
||||
"items": []
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "count_equals",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
19
fixtures/batch/batch_extract_bytes_sync_invalid_mime.json
Normal file
19
fixtures/batch/batch_extract_bytes_sync_invalid_mime.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"id": "batch_extract_bytes_sync_invalid_mime",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_bytes_sync: unsupported MIME",
|
||||
"call": "batch_extract_bytes_sync",
|
||||
"input": {
|
||||
"items": [
|
||||
{
|
||||
"content": [100, 97, 116, 97],
|
||||
"mime_type": "application/x-unknown"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
33
fixtures/batch/batch_file_async_basic.json
Normal file
33
fixtures/batch/batch_file_async_basic.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"id": "batch_file_async_basic",
|
||||
"category": "batch",
|
||||
"description": "Extract text from multiple files asynchronously",
|
||||
"tags": [
|
||||
"batch",
|
||||
"async",
|
||||
"concurrent",
|
||||
"multiple_files"
|
||||
],
|
||||
"call": "batch_extract_files",
|
||||
"input": {
|
||||
"paths": [
|
||||
{
|
||||
"path": "pdf/fake_memo.pdf"
|
||||
},
|
||||
{
|
||||
"path": "text/fake_text.txt"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
28
fixtures/batch/batch_file_async_not_found.json
Normal file
28
fixtures/batch/batch_file_async_not_found.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "batch_file_async_not_found",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_file async nonexistent",
|
||||
"tags": [
|
||||
"batch",
|
||||
"async"
|
||||
],
|
||||
"call": "batch_extract_files",
|
||||
"input": {
|
||||
"paths": [
|
||||
{
|
||||
"path": "/nonexistent/a.pdf"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
31
fixtures/batch/batch_file_not_found.json
Normal file
31
fixtures/batch/batch_file_not_found.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"id": "batch_file_not_found",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_file_sync nonexistent",
|
||||
"tags": [
|
||||
"batch",
|
||||
"error"
|
||||
],
|
||||
"call": "batch_extract_files_sync",
|
||||
"input": {
|
||||
"paths": [
|
||||
{
|
||||
"path": "/nonexistent/a.pdf"
|
||||
},
|
||||
{
|
||||
"path": "/nonexistent/b.txt"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
30
fixtures/batch/batch_file_partial.json
Normal file
30
fixtures/batch/batch_file_partial.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "batch_file_partial",
|
||||
"category": "batch",
|
||||
"description": "batch_extract_file_sync mixed",
|
||||
"tags": [
|
||||
"batch"
|
||||
],
|
||||
"call": "batch_extract_files_sync",
|
||||
"input": {
|
||||
"paths": [
|
||||
{
|
||||
"path": "text/plain.txt"
|
||||
},
|
||||
{
|
||||
"path": "/nonexistent/missing.pdf"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
32
fixtures/batch/batch_file_sync_basic.json
Normal file
32
fixtures/batch/batch_file_sync_basic.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"id": "batch_file_sync_basic",
|
||||
"category": "batch",
|
||||
"description": "Extract text from multiple files synchronously",
|
||||
"tags": [
|
||||
"batch",
|
||||
"sync",
|
||||
"multiple_files"
|
||||
],
|
||||
"call": "batch_extract_files_sync",
|
||||
"input": {
|
||||
"paths": [
|
||||
{
|
||||
"path": "pdf/fake_memo.pdf"
|
||||
},
|
||||
{
|
||||
"path": "text/fake_text.txt"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
41
fixtures/code/code_shebang_detection.json
Normal file
41
fixtures/code/code_shebang_detection.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"id": "code_shebang_detection",
|
||||
"category": "code",
|
||||
"description": "Test language detection from shebang line via bytes input",
|
||||
"tags": [
|
||||
"code",
|
||||
"shebang",
|
||||
"tree-sitter"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "code/script.sh",
|
||||
"mime_type": "text/x-source-code"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "text/x-source-code"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_all",
|
||||
"field": "content",
|
||||
"values": [
|
||||
"build",
|
||||
"clean"
|
||||
]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
30
fixtures/contract/api_batch_bytes_async.json
Normal file
30
fixtures/contract/api_batch_bytes_async.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "api_batch_bytes_async",
|
||||
"description": "Tests async batch bytes extraction API (batch_extract_bytes)",
|
||||
"tags": ["contract", "api", "batch"],
|
||||
"call": "extract_file",
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
},
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["May 5, 2023", "Mallori"]
|
||||
}
|
||||
]
|
||||
}
|
||||
33
fixtures/contract/api_batch_bytes_with_configs_async.json
Normal file
33
fixtures/contract/api_batch_bytes_with_configs_async.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"id": "api_batch_bytes_with_configs_async",
|
||||
"description": "Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)",
|
||||
"tags": ["contract", "api", "batch", "file_config"],
|
||||
"call": "extract_file",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"output_format": "markdown"
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "metadata.output_format",
|
||||
"value": "markdown"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
}
|
||||
}
|
||||
30
fixtures/contract/api_batch_file_async.json
Normal file
30
fixtures/contract/api_batch_file_async.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "api_batch_file_async",
|
||||
"description": "Tests async batch file extraction API (batch_extract_file)",
|
||||
"tags": ["contract", "api", "batch"],
|
||||
"call": "extract_file",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["May 5, 2023", "Mallori"]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
}
|
||||
}
|
||||
33
fixtures/contract/api_batch_file_with_configs_async.json
Normal file
33
fixtures/contract/api_batch_file_with_configs_async.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"id": "api_batch_file_with_configs_async",
|
||||
"description": "Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)",
|
||||
"tags": ["contract", "api", "batch", "file_config"],
|
||||
"call": "extract_file",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"output_format": "markdown"
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "metadata.output_format",
|
||||
"value": "markdown"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
}
|
||||
}
|
||||
30
fixtures/contract/api_extract_bytes_async.json
Normal file
30
fixtures/contract/api_extract_bytes_async.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "api_extract_bytes_async",
|
||||
"description": "Tests async bytes extraction API (extract_bytes)",
|
||||
"tags": ["contract", "api"],
|
||||
"call": "extract_file",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["May 5, 2023", "Mallori"]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
}
|
||||
}
|
||||
29
fixtures/contract/api_extract_file_async.json
Normal file
29
fixtures/contract/api_extract_file_async.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"id": "api_extract_file_async",
|
||||
"description": "Tests async file extraction API (extract_file)",
|
||||
"tags": ["contract", "api"],
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["May 5, 2023", "Mallori"]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM uses synchronous-only API; async extraction is not available on the wasm target"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
{
|
||||
"id": "config_chunking_prepend_heading_context",
|
||||
"description": "Tests markdown chunker prepends heading hierarchy to chunk content",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"chunking",
|
||||
"heading-context"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "markdown/extraction_test.md",
|
||||
"config": {
|
||||
"chunking": {
|
||||
"chunker_type": "markdown",
|
||||
"max_chars": 300,
|
||||
"max_overlap": 50,
|
||||
"prepend_heading_context": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"field": "chunks",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"type": "is_true",
|
||||
"field": "chunks_have_content"
|
||||
},
|
||||
{
|
||||
"type": "is_true",
|
||||
"field": "chunks_have_heading_context"
|
||||
},
|
||||
{
|
||||
"type": "is_true",
|
||||
"field": "first_chunk_starts_with_heading"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"id": "config_document_structure_with_headings",
|
||||
"description": "Tests document structure with DOCX heading-driven nesting",
|
||||
"tags": [
|
||||
"contract",
|
||||
"document_structure"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "docx/fake.docx",
|
||||
"config": {
|
||||
"include_document_structure": true
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
},
|
||||
{
|
||||
"type": "not_empty",
|
||||
"field": "document"
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"field": "document.nodes",
|
||||
"value": 1
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
36
fixtures/contract/config_element_types.json
Normal file
36
fixtures/contract/config_element_types.json
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"id": "config_element_types",
|
||||
"description": "Tests element-based result format with element type assertions on DOCX",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"result_format"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "docx/unit_test_headers.docx",
|
||||
"config": {
|
||||
"result_format": "element_based"
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "mime_type",
|
||||
"values": [
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"field": "elements",
|
||||
"value": 1
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
32
fixtures/contract/config_embedding_plugin.json
Normal file
32
fixtures/contract/config_embedding_plugin.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"id": "config_embedding_plugin",
|
||||
"description": "Tests EmbeddingModelType::Plugin variant deserialization in ChunkingConfig — config accepts the plugin variant shape; actual dispatch requires a host-language backend registered via register_embedding_backend at runtime",
|
||||
"tags": ["contract", "config", "embeddings", "plugin"],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"chunking": {
|
||||
"max_chars": 500,
|
||||
"max_overlap": 50,
|
||||
"embedding": {
|
||||
"model": {
|
||||
"type": "plugin",
|
||||
"name": "test-plugin-backend"
|
||||
},
|
||||
"normalize": true,
|
||||
"max_embed_duration_secs": 30
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["python", "rust", "node", "go", "ruby", "elixir", "wasm", "java", "csharp", "php", "r", "dart", "kotlin_android", "swift", "zig"],
|
||||
"reason": "EmbeddingModelType::Plugin requires a host-language backend registered via register_embedding_backend before dispatch; the e2e harness cannot register one. This fixture validates config round-trip (the {\"type\":\"plugin\",\"name\":...} shape is accepted by every binding's EmbeddingConfig)."
|
||||
}
|
||||
}
|
||||
34
fixtures/contract/config_extraction_timeout.json
Normal file
34
fixtures/contract/config_extraction_timeout.json
Normal file
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"id": "config_extraction_timeout",
|
||||
"description": "Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"timeout"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"extraction_timeout_secs": 300
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
46
fixtures/contract/config_keywords.json
Normal file
46
fixtures/contract/config_keywords.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"id": "config_keywords",
|
||||
"description": "Tests keyword extraction via YAKE algorithm",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"keywords"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"keywords": {
|
||||
"algorithm": "yake",
|
||||
"max_keywords": 10
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "not_empty",
|
||||
"field": "keywords"
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"field": "keywords",
|
||||
"value": 1
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
52
fixtures/contract/config_llm_structured_extraction.json
Normal file
52
fixtures/contract/config_llm_structured_extraction.json
Normal file
@@ -0,0 +1,52 @@
|
||||
{
|
||||
"id": "config_llm_structured_extraction",
|
||||
"description": "Tests structured extraction via liter-llm with JSON schema",
|
||||
"tags": ["contract", "config", "liter-llm", "structured-extraction"],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"structured_extraction": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"date": {
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["title"]
|
||||
},
|
||||
"schema_name": "memo_data",
|
||||
"llm": {
|
||||
"model": "openai/gpt-4o"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "not_empty",
|
||||
"field": "structured_output"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["python", "rust", "node", "go", "ruby", "elixir", "wasm", "java", "csharp", "php", "r", "dart", "kotlin_android", "swift", "zig"],
|
||||
"reason": "Requires liter-llm feature and KREUZBERG_LLM_API_KEY env var; runtime-only skip"
|
||||
}
|
||||
}
|
||||
43
fixtures/contract/config_pages.json
Normal file
43
fixtures/contract/config_pages.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"id": "config_pages",
|
||||
"description": "Tests page extraction and page marker configuration",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"pages": {
|
||||
"extract_pages": true,
|
||||
"insert_page_markers": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": [
|
||||
"PAGE"
|
||||
]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
48
fixtures/contract/config_quality_enabled.json
Normal file
48
fixtures/contract/config_quality_enabled.json
Normal file
@@ -0,0 +1,48 @@
|
||||
{
|
||||
"id": "config_quality_enabled",
|
||||
"description": "Tests quality scoring produces a score value in [0.0, 1.0]",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"quality"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"enable_quality_processing": true
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "not_empty",
|
||||
"field": "quality_score"
|
||||
},
|
||||
{
|
||||
"type": "greater_than_or_equal",
|
||||
"field": "quality_score",
|
||||
"value": 0.0
|
||||
},
|
||||
{
|
||||
"type": "less_than_or_equal",
|
||||
"field": "quality_score",
|
||||
"value": 1.0
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
41
fixtures/contract/config_security_limits.json
Normal file
41
fixtures/contract/config_security_limits.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"id": "config_security_limits",
|
||||
"description": "Tests archive extraction with custom security limits",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"security"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "archives/documents.zip",
|
||||
"config": {
|
||||
"security_limits": {
|
||||
"max_archive_size": 104857600,
|
||||
"max_compression_ratio": 50,
|
||||
"max_files_in_archive": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "mime_type",
|
||||
"values": [
|
||||
"application/zip",
|
||||
"application/x-zip-compressed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
51
fixtures/contract/config_tree_sitter.json
Normal file
51
fixtures/contract/config_tree_sitter.json
Normal file
@@ -0,0 +1,51 @@
|
||||
{
|
||||
"id": "config_tree_sitter",
|
||||
"description": "Tests tree-sitter configuration round-trip",
|
||||
"tags": [
|
||||
"contract",
|
||||
"config",
|
||||
"tree-sitter"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "code/hello.py",
|
||||
"config": {
|
||||
"tree_sitter": {
|
||||
"languages": [
|
||||
"python",
|
||||
"rust"
|
||||
],
|
||||
"groups": [
|
||||
"web"
|
||||
],
|
||||
"process": {
|
||||
"structure": true,
|
||||
"imports": true,
|
||||
"exports": true,
|
||||
"comments": false,
|
||||
"docstrings": false,
|
||||
"symbols": false,
|
||||
"diagnostics": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "text/x-source-code"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 5
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
30
fixtures/contract/output_format_bytes_markdown.json
Normal file
30
fixtures/contract/output_format_bytes_markdown.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"id": "output_format_bytes_markdown",
|
||||
"description": "Tests markdown output format via bytes extraction API",
|
||||
"tags": ["contract", "output_format", "bytes"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "pdf/fake_memo.pdf",
|
||||
"mime_type": "application/pdf",
|
||||
"config": {
|
||||
"output_format": "markdown"
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "metadata.output_format",
|
||||
"value": "markdown"
|
||||
}
|
||||
]
|
||||
}
|
||||
38
fixtures/contract/output_format_markdown.json
Normal file
38
fixtures/contract/output_format_markdown.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"id": "output_format_markdown",
|
||||
"description": "Tests Markdown output format",
|
||||
"tags": [
|
||||
"contract",
|
||||
"output_format"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pdf/fake_memo.pdf",
|
||||
"config": {
|
||||
"output_format": "markdown"
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/pdf"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "metadata.output_format",
|
||||
"value": "markdown"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
11
fixtures/detection/detect_mime_bytes_html.json
Normal file
11
fixtures/detection/detect_mime_bytes_html.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "detect_mime_bytes_html",
|
||||
"category": "detection",
|
||||
"description": "Detect HTML MIME from bytes",
|
||||
"tags": ["mime_detection", "bytes"],
|
||||
"call": "detect_mime_type_from_bytes",
|
||||
"input": {
|
||||
"data": "html/html.html"
|
||||
},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
15
fixtures/detection/detect_mime_bytes_pdf.json
Normal file
15
fixtures/detection/detect_mime_bytes_pdf.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "detect_mime_bytes_pdf",
|
||||
"category": "detection",
|
||||
"description": "Detect PDF MIME type from bytes",
|
||||
"tags": ["mime_detection", "bytes", "pdf"],
|
||||
"call": "detect_mime_type_from_bytes",
|
||||
"input": {
|
||||
"data": "pdf/fake_memo.pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
15
fixtures/detection/detect_mime_bytes_png.json
Normal file
15
fixtures/detection/detect_mime_bytes_png.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "detect_mime_bytes_png",
|
||||
"category": "detection",
|
||||
"description": "Detect PNG MIME type from bytes",
|
||||
"tags": ["mime_detection", "bytes", "png"],
|
||||
"call": "detect_mime_type_from_bytes",
|
||||
"input": {
|
||||
"data": "images/test_hello_world.png"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
11
fixtures/detection/get_extensions_unknown_mime.json
Normal file
11
fixtures/detection/get_extensions_unknown_mime.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"id": "get_extensions_unknown_mime",
|
||||
"category": "detection",
|
||||
"description": "get_extensions unknown MIME",
|
||||
"tags": ["mime_detection", "error"],
|
||||
"call": "get_extensions_for_mime",
|
||||
"input": {
|
||||
"mime_type": "application/x-totally-unknown"
|
||||
},
|
||||
"assertions": [{ "type": "error" }]
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"id": "embed_texts_async_empty_input",
|
||||
"category": "embed_async_pending",
|
||||
"description": "embed_texts_async: empty text list",
|
||||
"call": "embed_texts_async",
|
||||
"input": {
|
||||
"texts": []
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "count_equals",
|
||||
"field": "embeddings",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
22
fixtures/embed_async_pending/embed_texts_async_happy.json
Normal file
22
fixtures/embed_async_pending/embed_texts_async_happy.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "embed_texts_async_happy",
|
||||
"category": "embed_async_pending",
|
||||
"description": "embed_texts_async: basic async embedding",
|
||||
"call": "embed_texts_async",
|
||||
"input": {
|
||||
"texts": [
|
||||
"First",
|
||||
"Second"
|
||||
]
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"field": "embeddings",
|
||||
"value": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "embed_texts_async_preset_switch",
|
||||
"category": "embed_async_pending",
|
||||
"description": "embed_texts_async: preset override",
|
||||
"call": "embed_texts_async",
|
||||
"input": {
|
||||
"texts": [
|
||||
"Text"
|
||||
],
|
||||
"config": {
|
||||
"model": {
|
||||
"type": "preset",
|
||||
"name": "balanced"
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
21
fixtures/embed_extra/embed_texts_batch.json
Normal file
21
fixtures/embed_extra/embed_texts_batch.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"id": "embed_texts_batch",
|
||||
"category": "embed_extra",
|
||||
"description": "Batch embed texts",
|
||||
"tags": ["embedding", "batch"],
|
||||
"call": "embed_texts",
|
||||
"input": {
|
||||
"texts": ["Hello", "World"],
|
||||
"config": {
|
||||
"model": {
|
||||
"type": "preset",
|
||||
"name": "balanced"
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [{ "type": "not_error" }],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "embeddings feature depends on ONNX Runtime which is not available on the WASM target"
|
||||
}
|
||||
}
|
||||
28
fixtures/embeddings/embed_texts_different_preset.json
Normal file
28
fixtures/embeddings/embed_texts_different_preset.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "embed_texts_different_preset",
|
||||
"category": "embeddings",
|
||||
"description": "embed_texts: multilingual preset",
|
||||
"call": "embed_texts",
|
||||
"input": {
|
||||
"texts": ["Hello world", "Test"],
|
||||
"config": {
|
||||
"model": {
|
||||
"type": "preset",
|
||||
"name": "multilingual"
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "count_min",
|
||||
"field": "embeddings",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"]
|
||||
}
|
||||
}
|
||||
17
fixtures/embeddings/get_embedding_preset_known.json
Normal file
17
fixtures/embeddings/get_embedding_preset_known.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "get_embedding_preset_known",
|
||||
"category": "embeddings",
|
||||
"description": "get_embedding_preset: known preset",
|
||||
"call": "get_embedding_preset",
|
||||
"input": {
|
||||
"preset_name": "balanced"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"]
|
||||
}
|
||||
}
|
||||
17
fixtures/embeddings/get_embedding_preset_nominal.json
Normal file
17
fixtures/embeddings/get_embedding_preset_nominal.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "get_embedding_preset_nominal",
|
||||
"category": "embeddings",
|
||||
"description": "get_embedding_preset: nominal case",
|
||||
"call": "get_embedding_preset",
|
||||
"input": {
|
||||
"preset_name": "balanced"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"]
|
||||
}
|
||||
}
|
||||
17
fixtures/embeddings/get_embedding_preset_unknown.json
Normal file
17
fixtures/embeddings/get_embedding_preset_unknown.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "get_embedding_preset_unknown",
|
||||
"category": "embeddings",
|
||||
"description": "get_embedding_preset: unknown preset fails",
|
||||
"call": "get_embedding_preset",
|
||||
"input": {
|
||||
"preset_name": "nonexistent-xyz"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "is_empty"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"]
|
||||
}
|
||||
}
|
||||
15
fixtures/embeddings/list_embedding_presets_sanity.json
Normal file
15
fixtures/embeddings/list_embedding_presets_sanity.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "list_embedding_presets_sanity",
|
||||
"category": "embeddings",
|
||||
"description": "list_embedding_presets: returns at least one",
|
||||
"call": "list_embedding_presets",
|
||||
"input": {},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_empty"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"]
|
||||
}
|
||||
}
|
||||
17
fixtures/error/error_empty_bytes.json
Normal file
17
fixtures/error/error_empty_bytes.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "error_empty_bytes",
|
||||
"category": "error",
|
||||
"description": "Graceful handling of empty bytes (should not error)",
|
||||
"tags": ["error", "input", "edge-case"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "text/empty.txt",
|
||||
"mime_type": "text/plain",
|
||||
"config": {}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/error/error_empty_mime.json
Normal file
17
fixtures/error/error_empty_mime.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "error_empty_mime",
|
||||
"category": "error",
|
||||
"description": "Error when extracting with empty MIME type",
|
||||
"tags": ["error", "input", "mime"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "text/plain.txt",
|
||||
"mime_type": "",
|
||||
"config": {}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
13
fixtures/error/error_extract_bytes_conflicting_ocr.json
Normal file
13
fixtures/error/error_extract_bytes_conflicting_ocr.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"id": "error_extract_bytes_conflicting_ocr",
|
||||
"category": "error",
|
||||
"description": "extract_bytes force+disable OCR",
|
||||
"tags": ["error", "validation"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "text/fake_text.txt",
|
||||
"mime_type": "text/plain",
|
||||
"config": { "force_ocr": true, "disable_ocr": true }
|
||||
},
|
||||
"assertions": [{ "type": "error" }]
|
||||
}
|
||||
17
fixtures/error/error_invalid_mime_format.json
Normal file
17
fixtures/error/error_invalid_mime_format.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "error_invalid_mime_format",
|
||||
"category": "error",
|
||||
"description": "Error when extracting with invalid MIME type format",
|
||||
"tags": ["error", "input", "mime"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "text/plain.txt",
|
||||
"mime_type": "not-a-mime",
|
||||
"config": {}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/error/error_unsupported_mime.json
Normal file
17
fixtures/error/error_unsupported_mime.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "error_unsupported_mime",
|
||||
"category": "error",
|
||||
"description": "Error when extracting with unsupported MIME type",
|
||||
"tags": ["error", "input", "mime"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "text/plain.txt",
|
||||
"mime_type": "application/x-nonexistent",
|
||||
"config": {}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
21
fixtures/format_specific/format_docx_standalone.json
Normal file
21
fixtures/format_specific/format_docx_standalone.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"id": "format_docx_standalone",
|
||||
"category": "format_specific",
|
||||
"description": "Standalone DOCX extraction using extract_bytes_sync",
|
||||
"tags": ["format_specific", "docx", "text_extraction"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "docx/fake.docx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 20
|
||||
}
|
||||
]
|
||||
}
|
||||
26
fixtures/format_specific/format_hwpx_standalone.json
Normal file
26
fixtures/format_specific/format_hwpx_standalone.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_hwpx_standalone",
|
||||
"category": "format_specific",
|
||||
"description": "Standalone HWPX extraction using extract_bytes_sync",
|
||||
"tags": ["format_specific", "hwpx", "text_extraction"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "hwpx/simple.hwpx",
|
||||
"mime_type": "application/haansofthwpx"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"type": "contains",
|
||||
"field": "content",
|
||||
"value": "Hello from HWPX"
|
||||
}
|
||||
]
|
||||
}
|
||||
26
fixtures/format_specific/format_pdf_text.json
Normal file
26
fixtures/format_specific/format_pdf_text.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_pdf_text",
|
||||
"category": "format_specific",
|
||||
"description": "Standalone PDF text extraction using extract_bytes_sync",
|
||||
"tags": ["format_specific", "pdf", "text_extraction"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "pdf/fake_memo.pdf",
|
||||
"mime_type": "application/pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["Mallori", "May"]
|
||||
}
|
||||
]
|
||||
}
|
||||
26
fixtures/format_specific/format_pptx.json
Normal file
26
fixtures/format_specific/format_pptx.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_pptx",
|
||||
"category": "format_specific",
|
||||
"description": "PPTX presentation extraction using extract_file_sync",
|
||||
"tags": [
|
||||
"format_specific",
|
||||
"pptx",
|
||||
"text_extraction"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pptx/simple.pptx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
26
fixtures/format_specific/format_xlsx.json
Normal file
26
fixtures/format_specific/format_xlsx.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_xlsx",
|
||||
"category": "format_specific",
|
||||
"description": "XLSX spreadsheet extraction using extract_file_sync",
|
||||
"tags": [
|
||||
"format_specific",
|
||||
"xlsx",
|
||||
"text_extraction"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "xlsx/stanley_cups.xlsx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
BIN
fixtures/images/test_hello_world.png
Normal file
BIN
fixtures/images/test_hello_world.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 911 B |
21
fixtures/pdf/render_pdf_page_first.json
Normal file
21
fixtures/pdf/render_pdf_page_first.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"id": "render_pdf_page_first",
|
||||
"category": "pdf",
|
||||
"description": "render_pdf_page_to_png: first page",
|
||||
"call": "render_pdf_page_to_png",
|
||||
"input": {
|
||||
"pdf_bytes": "pdf/fake_memo.pdf",
|
||||
"page_index": 0,
|
||||
"dpi": null,
|
||||
"password": null
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"value": 100
|
||||
}
|
||||
]
|
||||
}
|
||||
36
fixtures/pdf/render_pdf_page_missing_file.json
Normal file
36
fixtures/pdf/render_pdf_page_missing_file.json
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"id": "render_pdf_page_missing_file",
|
||||
"category": "pdf",
|
||||
"description": "render_pdf_page_to_png: missing file",
|
||||
"call": "render_pdf_page_to_png",
|
||||
"skip": {
|
||||
"languages": [
|
||||
"python",
|
||||
"node",
|
||||
"ruby",
|
||||
"php",
|
||||
"ffi",
|
||||
"go",
|
||||
"java",
|
||||
"csharp",
|
||||
"elixir",
|
||||
"wasm",
|
||||
"r",
|
||||
"dart",
|
||||
"kotlin_android",
|
||||
"swift",
|
||||
"zig",
|
||||
"rust"
|
||||
],
|
||||
"reason": "render_pdf_page_to_png takes pre-loaded pdf_bytes; the harness materializes file contents at generation time, so a runtime missing-file error path is not expressible via this fixture shape"
|
||||
},
|
||||
"input": {
|
||||
"pdf_path": "nonexistent/file.pdf",
|
||||
"page_index": 0
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/pdf/render_pdf_page_out_of_range.json
Normal file
17
fixtures/pdf/render_pdf_page_out_of_range.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "render_pdf_page_out_of_range",
|
||||
"category": "pdf",
|
||||
"description": "render_pdf_page_to_png: page out of range",
|
||||
"call": "render_pdf_page_to_png",
|
||||
"input": {
|
||||
"pdf_bytes": "pdf/fake_memo.pdf",
|
||||
"page_index": 999,
|
||||
"dpi": null,
|
||||
"password": null
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
279
fixtures/plugin_api/README.md
Normal file
279
fixtures/plugin_api/README.md
Normal file
@@ -0,0 +1,279 @@
|
||||
# Plugin API Test Fixtures
|
||||
|
||||
This directory contains fixtures for generating E2E tests for plugin/config/utility APIs across all language bindings.
|
||||
|
||||
## Purpose
|
||||
|
||||
Unlike document extraction fixtures (in parent `fixtures/` directory), these fixtures test:
|
||||
|
||||
- Plugin management APIs (validators, post-processors, OCR backends, document extractors)
|
||||
- Configuration loading APIs (`from_file`, `discover`)
|
||||
- MIME utility APIs (`detect_mime_type`, `get_extensions_for_mime`, etc.)
|
||||
|
||||
## Schema
|
||||
|
||||
See `schema.json` for the complete JSON schema definition.
|
||||
|
||||
## Fixture Structure
|
||||
|
||||
Each fixture is a JSON file defining:
|
||||
|
||||
- **id**: Unique identifier (e.g., `validators_list`)
|
||||
- **api_category**: Category of API (`validator_management`, `configuration`, `mime_utilities`, etc.)
|
||||
- **api_function**: Function name being tested (snake_case format)
|
||||
- **test_spec**: Test specification including:
|
||||
- **pattern**: Test pattern type (see patterns below)
|
||||
- **setup**: Optional setup steps (temp files, directories, etc.)
|
||||
- **function_call**: Function to call with arguments
|
||||
- **assertions**: Expected behavior and values
|
||||
- **teardown**: Optional cleanup steps
|
||||
|
||||
## Test Patterns
|
||||
|
||||
### 1. `simple_list`
|
||||
|
||||
Lists items from a registry. No setup required.
|
||||
|
||||
**Example**: `validators_list.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"pattern": "simple_list",
|
||||
"function_call": { "name": "list_validators", "args": [] },
|
||||
"assertions": { "return_type": "list", "list_item_type": "string" }
|
||||
}
|
||||
```
|
||||
|
||||
### 2. `clear_registry`
|
||||
|
||||
Clears a registry and verifies it's empty.
|
||||
|
||||
**Example**: `validators_clear.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"pattern": "clear_registry",
|
||||
"function_call": { "name": "clear_validators", "args": [] },
|
||||
"assertions": { "return_type": "void", "verify_cleanup": true }
|
||||
}
|
||||
```
|
||||
|
||||
### 3. `graceful_unregister`
|
||||
|
||||
Attempts to unregister a nonexistent item without error.
|
||||
|
||||
**Example**: `ocr_backends_unregister.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"pattern": "graceful_unregister",
|
||||
"function_call": { "name": "unregister_ocr_backend", "args": ["nonexistent-backend-xyz"] },
|
||||
"assertions": { "does_not_throw": true }
|
||||
}
|
||||
```
|
||||
|
||||
### 4. `config_from_file`
|
||||
|
||||
Creates a temp TOML file, loads config, verifies properties.
|
||||
|
||||
**Example**: `config_from_file.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"pattern": "config_from_file",
|
||||
"setup": {
|
||||
"create_temp_file": true,
|
||||
"temp_file_name": "test_config.toml",
|
||||
"temp_file_content": "[chunking]\\nmax_chars = 100\\n"
|
||||
},
|
||||
"function_call": {
|
||||
"name": "from_file",
|
||||
"is_method": true,
|
||||
"class_name": "ExtractionConfig",
|
||||
"args": ["${temp_file_path}"]
|
||||
},
|
||||
"assertions": {
|
||||
"object_properties": [{ "path": "chunking.max_chars", "value": 100 }]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. `config_discover`
|
||||
|
||||
Creates config in parent dir, changes to subdirectory, discovers config.
|
||||
|
||||
**Example**: `config_discover.json`
|
||||
|
||||
- Creates `kreuzberg.toml` in temp dir
|
||||
- Creates subdirectory and changes to it
|
||||
- Calls `ExtractionConfig.discover()`
|
||||
- Verifies config was found from parent
|
||||
|
||||
### 6. `mime_from_bytes`
|
||||
|
||||
Detects MIME type from byte content.
|
||||
|
||||
**Example**: `mime_detect_bytes.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"pattern": "mime_from_bytes",
|
||||
"setup": { "test_data": "%PDF-1.4\\n" },
|
||||
"function_call": { "name": "detect_mime_type", "args": ["${test_data_bytes}"] },
|
||||
"assertions": { "string_contains": "pdf" }
|
||||
}
|
||||
```
|
||||
|
||||
### 7. `mime_from_path`
|
||||
|
||||
Creates temp file, detects MIME from path.
|
||||
|
||||
**Example**: `mime_detect_path.json`
|
||||
|
||||
### 8. `mime_extension_lookup`
|
||||
|
||||
Queries extensions for a MIME type.
|
||||
|
||||
**Example**: `mime_get_extensions.json`
|
||||
|
||||
## Variable Substitution
|
||||
|
||||
Fixtures can use variables in `args`:
|
||||
|
||||
- `${temp_file_path}` - Path to created temp file
|
||||
- `${temp_dir_path}` - Path to created temp directory
|
||||
- `${test_data_bytes}` - Byte data from `setup.test_data`
|
||||
|
||||
## Language-Specific Handling
|
||||
|
||||
The generator translates fixtures to language-specific code:
|
||||
|
||||
### Function Names
|
||||
|
||||
- Fixture: `list_validators` (snake_case)
|
||||
- Python: `list_validators()`
|
||||
- TypeScript: `listValidators()`
|
||||
- Ruby: `list_validators`
|
||||
- Java: `listValidators()`
|
||||
- Go: `ListValidators()`
|
||||
|
||||
### Class Methods
|
||||
|
||||
- Fixture: `ExtractionConfig.from_file`
|
||||
- Python: `ExtractionConfig.from_file()`
|
||||
- TypeScript: `ExtractionConfig.fromFile()`
|
||||
- Ruby: `Config::Extraction.from_file`
|
||||
- Java: `ExtractionConfig.fromFile()`
|
||||
- Go: `ConfigFromFile()`
|
||||
|
||||
### Temp File Handling
|
||||
|
||||
- Python: `tmp_path` fixture (pytest)
|
||||
- TypeScript: `fs.mkdtempSync()` + `fs.rmSync()`
|
||||
- Ruby: `Dir.mktmpdir { }` block
|
||||
- Java: `@TempDir` annotation
|
||||
- Go: `t.TempDir()`
|
||||
|
||||
### Assertions
|
||||
|
||||
- Python: `assert` statements
|
||||
- TypeScript: `expect().toBe()` (Vitest)
|
||||
- Ruby: `expect().to` (RSpec)
|
||||
- Java: `assertEquals()` (JUnit)
|
||||
- Go: `if err != nil` checks
|
||||
|
||||
## Special Cases
|
||||
|
||||
### Go Lazy Initialization
|
||||
|
||||
Document extractors in Go are lazily initialized. The fixture `extractors_list.json` includes:
|
||||
|
||||
```json
|
||||
{
|
||||
"setup": {
|
||||
"lazy_init_required": {
|
||||
"languages": ["go"],
|
||||
"init_action": "extract_file_sync",
|
||||
"init_data": {
|
||||
"create_temp_file": true,
|
||||
"temp_file_name": "test.pdf",
|
||||
"temp_file_content": "%PDF-1.4\\n%EOF\\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The generator will produce Go-specific setup code to extract a PDF before listing extractors.
|
||||
|
||||
## Fixture Inventory
|
||||
|
||||
### Validator Management (2 fixtures)
|
||||
|
||||
- `validators_list.json` - List all validators
|
||||
- `validators_clear.json` - Clear validators
|
||||
|
||||
### Post-Processor Management (2 fixtures)
|
||||
|
||||
- `post_processors_list.json` - List all post-processors
|
||||
- `post_processors_clear.json` - Clear post-processors
|
||||
|
||||
### OCR Backend Management (3 fixtures)
|
||||
|
||||
- `ocr_backends_list.json` - List all OCR backends
|
||||
- `ocr_backends_unregister.json` - Unregister nonexistent backend
|
||||
- `ocr_backends_clear.json` - Clear OCR backends
|
||||
|
||||
### Document Extractor Management (3 fixtures)
|
||||
|
||||
- `extractors_list.json` - List all extractors (with Go lazy init)
|
||||
- `extractors_unregister.json` - Unregister nonexistent extractor
|
||||
- `extractors_clear.json` - Clear extractors
|
||||
|
||||
### Configuration APIs (2 fixtures)
|
||||
|
||||
- `config_from_file.json` - Load config from TOML file
|
||||
- `config_discover.json` - Discover config from directory tree
|
||||
|
||||
### MIME Utilities (3 fixtures)
|
||||
|
||||
- `mime_detect_bytes.json` - Detect MIME from bytes
|
||||
- `mime_detect_path.json` - Detect MIME from file path
|
||||
- `mime_get_extensions.json` - Get extensions for MIME type
|
||||
|
||||
**Total**: 15 fixtures → 75 generated tests (15 per language × 5 languages)
|
||||
|
||||
## Regenerating Tests
|
||||
|
||||
After modifying fixtures, regenerate tests:
|
||||
|
||||
```bash
|
||||
# Regenerate for all languages
|
||||
cargo run -p kreuzberg-e2e-generator -- generate --lang python
|
||||
cargo run -p kreuzberg-e2e-generator -- generate --lang typescript
|
||||
cargo run -p kreuzberg-e2e-generator -- generate --lang ruby
|
||||
cargo run -p kreuzberg-e2e-generator -- generate --lang java
|
||||
cargo run -p kreuzberg-e2e-generator -- generate --lang go
|
||||
```
|
||||
|
||||
Or use the task runner:
|
||||
|
||||
```bash
|
||||
task e2e:generate
|
||||
```
|
||||
|
||||
## Adding New Fixtures
|
||||
|
||||
1. Create JSON file following `schema.json`
|
||||
2. Choose appropriate test pattern
|
||||
3. Define setup/teardown if needed
|
||||
4. Specify assertions
|
||||
5. Regenerate tests
|
||||
6. Verify tests compile and pass
|
||||
|
||||
## Notes
|
||||
|
||||
- **DO NOT** write E2E tests by hand
|
||||
- **ALL** E2E tests must be generated from fixtures
|
||||
- This is non-negotiable architecture
|
||||
- Hand-written tests will be rejected by CI
|
||||
17
fixtures/plugin_api/document_extractors_clear.json
Normal file
17
fixtures/plugin_api/document_extractors_clear.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "document_extractors_clear",
|
||||
"category": "document_extractor_management",
|
||||
"description": "Clear all document extractors and verify list is empty",
|
||||
"tags": [
|
||||
"document_extractor",
|
||||
"plugin_management",
|
||||
"clear",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "clear_document_extractors",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/embedding_backends_clear.json
Normal file
17
fixtures/plugin_api/embedding_backends_clear.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "embedding_backends_clear",
|
||||
"category": "embedding_backend_management",
|
||||
"description": "Clear all embedding backends and verify list is empty",
|
||||
"tags": [
|
||||
"embedding",
|
||||
"plugin_management",
|
||||
"clear",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "clear_embedding_backends",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
12
fixtures/plugin_api/embedding_backends_list.json
Normal file
12
fixtures/plugin_api/embedding_backends_list.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "embedding_backends_list",
|
||||
"category": "embedding_backend_management",
|
||||
"description": "List all registered embedding backends",
|
||||
"tags": ["embedding", "plugin_management", "list"],
|
||||
"call": "list_embedding_backends",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
25
fixtures/plugin_api/extractors_list.json
Normal file
25
fixtures/plugin_api/extractors_list.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"id": "extractors_list",
|
||||
"category": "document_extractor_management",
|
||||
"description": "List all registered document extractors",
|
||||
"tags": ["extractors", "plugin_management", "list"],
|
||||
"call": "list_document_extractors",
|
||||
"input": {
|
||||
"setup": {
|
||||
"lazy_init_required": {
|
||||
"languages": ["go"],
|
||||
"init_action": "extract_file_sync",
|
||||
"init_data": {
|
||||
"create_temp_file": true,
|
||||
"temp_file_name": "test.pdf",
|
||||
"temp_file_content": "%PDF-1.4\n%EOF\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/mime_detect_bytes.json
Normal file
17
fixtures/plugin_api/mime_detect_bytes.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "mime_detect_bytes",
|
||||
"category": "mime_utilities",
|
||||
"description": "Detect MIME type from file bytes",
|
||||
"tags": ["mime", "detection", "bytes"],
|
||||
"call": "detect_mime_type_from_bytes",
|
||||
"input": {
|
||||
"data": "pdf/fake_memo.pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "contains",
|
||||
"field": "result",
|
||||
"value": "pdf"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/mime_detect_image.json
Normal file
17
fixtures/plugin_api/mime_detect_image.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "mime_detect_image",
|
||||
"category": "mime_utilities",
|
||||
"description": "Detect MIME type from PNG image bytes",
|
||||
"tags": ["mime", "detection", "image", "bytes"],
|
||||
"call": "detect_mime_type_from_bytes",
|
||||
"input": {
|
||||
"data": "images/test_hello_world.png"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "contains",
|
||||
"field": "result",
|
||||
"value": "png"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/mime_get_extensions.json
Normal file
17
fixtures/plugin_api/mime_get_extensions.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "mime_get_extensions",
|
||||
"category": "mime_utilities",
|
||||
"description": "Get file extensions for a MIME type",
|
||||
"tags": ["mime", "extensions", "lookup"],
|
||||
"call": "get_extensions_for_mime",
|
||||
"input": {
|
||||
"mime_type": "application/pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "contains",
|
||||
"field": "result",
|
||||
"value": "pdf"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/ocr_backends_clear.json
Normal file
17
fixtures/plugin_api/ocr_backends_clear.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "ocr_backends_clear",
|
||||
"category": "ocr_backend_management",
|
||||
"description": "Clear all OCR backends and verify list is empty",
|
||||
"tags": [
|
||||
"ocr",
|
||||
"plugin_management",
|
||||
"clear",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "clear_ocr_backends",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
12
fixtures/plugin_api/ocr_backends_list.json
Normal file
12
fixtures/plugin_api/ocr_backends_list.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "ocr_backends_list",
|
||||
"category": "ocr_backend_management",
|
||||
"description": "List all registered OCR backends",
|
||||
"tags": ["ocr", "plugin_management", "list"],
|
||||
"call": "list_ocr_backends",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
20
fixtures/plugin_api/ocr_backends_unregister.json
Normal file
20
fixtures/plugin_api/ocr_backends_unregister.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"id": "ocr_backends_unregister",
|
||||
"category": "ocr_backend_management",
|
||||
"description": "Unregister nonexistent OCR backend gracefully",
|
||||
"tags": [
|
||||
"ocr",
|
||||
"plugin_management",
|
||||
"unregister",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "unregister_ocr_backend",
|
||||
"input": {
|
||||
"name": "nonexistent-backend-xyz"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/post_processors_clear.json
Normal file
17
fixtures/plugin_api/post_processors_clear.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "post_processors_clear",
|
||||
"category": "post_processor_management",
|
||||
"description": "Clear all post-processors and verify list is empty",
|
||||
"tags": [
|
||||
"post_processors",
|
||||
"plugin_management",
|
||||
"clear",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "clear_post_processors",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
12
fixtures/plugin_api/post_processors_list.json
Normal file
12
fixtures/plugin_api/post_processors_list.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "post_processors_list",
|
||||
"category": "post_processor_management",
|
||||
"description": "List all registered post-processors",
|
||||
"tags": ["post_processors", "plugin_management", "list"],
|
||||
"call": "list_post_processors",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "register_document_extractor_trait_bridge",
|
||||
"category": "plugin_api",
|
||||
"description": "register_document_extractor: trait bridge",
|
||||
"tags": [
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "register_document_extractor",
|
||||
"input": {
|
||||
"extractor": {
|
||||
"type": "test",
|
||||
"name": "test-extractor"
|
||||
}
|
||||
},
|
||||
"args": [
|
||||
{
|
||||
"name": "extractor",
|
||||
"field": "extractor",
|
||||
"arg_type": "test_backend",
|
||||
"trait": "DocumentExtractor"
|
||||
}
|
||||
],
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"id": "register_embedding_backend_trait_bridge",
|
||||
"category": "plugin_api",
|
||||
"description": "register_embedding_backend: trait bridge",
|
||||
"tags": [
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "register_embedding_backend",
|
||||
"input": {
|
||||
"backend": {
|
||||
"type": "test",
|
||||
"name": "test-embedding-backend",
|
||||
"dimensions": 768
|
||||
}
|
||||
},
|
||||
"args": [
|
||||
{
|
||||
"name": "backend",
|
||||
"field": "backend",
|
||||
"arg_type": "test_backend",
|
||||
"trait": "EmbeddingBackend"
|
||||
}
|
||||
],
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
28
fixtures/plugin_api/register_ocr_backend_trait_bridge.json
Normal file
28
fixtures/plugin_api/register_ocr_backend_trait_bridge.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "register_ocr_backend_trait_bridge",
|
||||
"category": "plugin_api",
|
||||
"description": "register_ocr_backend: trait bridge",
|
||||
"tags": [
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "register_ocr_backend",
|
||||
"input": {
|
||||
"backend": {
|
||||
"type": "test",
|
||||
"name": "test-backend"
|
||||
}
|
||||
},
|
||||
"args": [
|
||||
{
|
||||
"name": "backend",
|
||||
"field": "backend",
|
||||
"arg_type": "test_backend",
|
||||
"trait": "OcrBackend"
|
||||
}
|
||||
],
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "register_post_processor_trait_bridge",
|
||||
"category": "plugin_api",
|
||||
"description": "register_post_processor: trait bridge",
|
||||
"tags": [
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "register_post_processor",
|
||||
"input": {
|
||||
"processor": {
|
||||
"type": "test",
|
||||
"name": "test-processor"
|
||||
}
|
||||
},
|
||||
"args": [
|
||||
{
|
||||
"name": "processor",
|
||||
"field": "processor",
|
||||
"arg_type": "test_backend",
|
||||
"trait": "PostProcessor"
|
||||
}
|
||||
],
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
28
fixtures/plugin_api/register_renderer_trait_bridge.json
Normal file
28
fixtures/plugin_api/register_renderer_trait_bridge.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "register_renderer_trait_bridge",
|
||||
"category": "plugin_api",
|
||||
"description": "register_renderer: trait bridge",
|
||||
"tags": [
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "register_renderer",
|
||||
"input": {
|
||||
"renderer": {
|
||||
"type": "test",
|
||||
"name": "test-renderer"
|
||||
}
|
||||
},
|
||||
"args": [
|
||||
{
|
||||
"name": "renderer",
|
||||
"field": "renderer",
|
||||
"arg_type": "test_backend",
|
||||
"trait": "Renderer"
|
||||
}
|
||||
],
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
28
fixtures/plugin_api/register_validator_trait_bridge.json
Normal file
28
fixtures/plugin_api/register_validator_trait_bridge.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"id": "register_validator_trait_bridge",
|
||||
"category": "plugin_api",
|
||||
"description": "register_validator: trait bridge",
|
||||
"tags": [
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "register_validator",
|
||||
"input": {
|
||||
"validator": {
|
||||
"type": "test",
|
||||
"name": "test-validator"
|
||||
}
|
||||
},
|
||||
"args": [
|
||||
{
|
||||
"name": "validator",
|
||||
"field": "validator",
|
||||
"arg_type": "test_backend",
|
||||
"trait": "Validator"
|
||||
}
|
||||
],
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/renderers_clear.json
Normal file
17
fixtures/plugin_api/renderers_clear.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "renderers_clear",
|
||||
"category": "renderer_management",
|
||||
"description": "Clear all renderers and verify list is empty",
|
||||
"tags": [
|
||||
"renderer",
|
||||
"plugin_management",
|
||||
"clear",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "clear_renderers",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
12
fixtures/plugin_api/renderers_list.json
Normal file
12
fixtures/plugin_api/renderers_list.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "renderers_list",
|
||||
"category": "renderer_management",
|
||||
"description": "List all registered renderers",
|
||||
"tags": ["renderer", "plugin_management", "list"],
|
||||
"call": "list_renderers",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": "unregister_document_extractor_after_register",
|
||||
"category": "plugin_api",
|
||||
"description": "unregister_document_extractor",
|
||||
"call": "unregister_document_extractor",
|
||||
"input": {
|
||||
"name": "test-extractor"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"plugin-lifecycle",
|
||||
"trait-bridge"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": "unregister_embedding_backend_after_register",
|
||||
"category": "plugin_api",
|
||||
"description": "unregister_embedding_backend",
|
||||
"call": "unregister_embedding_backend",
|
||||
"input": {
|
||||
"name": "test-embedding-backend"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"plugin-lifecycle",
|
||||
"trait-bridge"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": "unregister_post_processor_after_register",
|
||||
"category": "plugin_api",
|
||||
"description": "unregister_post_processor",
|
||||
"call": "unregister_post_processor",
|
||||
"input": {
|
||||
"name": "test-processor"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"plugin-lifecycle",
|
||||
"trait-bridge"
|
||||
]
|
||||
}
|
||||
18
fixtures/plugin_api/unregister_renderer_after_register.json
Normal file
18
fixtures/plugin_api/unregister_renderer_after_register.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": "unregister_renderer_after_register",
|
||||
"category": "plugin_api",
|
||||
"description": "unregister_renderer",
|
||||
"call": "unregister_renderer",
|
||||
"input": {
|
||||
"name": "test-renderer"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"plugin-lifecycle",
|
||||
"trait-bridge"
|
||||
]
|
||||
}
|
||||
18
fixtures/plugin_api/unregister_validator_after_register.json
Normal file
18
fixtures/plugin_api/unregister_validator_after_register.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"id": "unregister_validator_after_register",
|
||||
"category": "plugin_api",
|
||||
"description": "unregister_validator",
|
||||
"call": "unregister_validator",
|
||||
"input": {
|
||||
"name": "test-validator"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"plugin-lifecycle",
|
||||
"trait-bridge"
|
||||
]
|
||||
}
|
||||
17
fixtures/plugin_api/validators_clear.json
Normal file
17
fixtures/plugin_api/validators_clear.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"id": "validators_clear",
|
||||
"category": "validator_management",
|
||||
"description": "Clear all validators and verify list is empty",
|
||||
"tags": [
|
||||
"validators",
|
||||
"plugin_management",
|
||||
"clear",
|
||||
"trait-bridge"
|
||||
],
|
||||
"call": "clear_validators",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
12
fixtures/plugin_api/validators_list.json
Normal file
12
fixtures/plugin_api/validators_list.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"id": "validators_list",
|
||||
"category": "validator_management",
|
||||
"description": "List all registered validators",
|
||||
"tags": ["validators", "plugin_management", "list"],
|
||||
"call": "list_validators",
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
15
fixtures/registry/extensions_docx.json
Normal file
15
fixtures/registry/extensions_docx.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "extensions_docx",
|
||||
"category": "registry_operations",
|
||||
"description": "Get file extensions for DOCX MIME type",
|
||||
"tags": ["registry", "extensions", "docx"],
|
||||
"call": "get_extensions_for_mime",
|
||||
"input": {
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
15
fixtures/registry/extensions_html.json
Normal file
15
fixtures/registry/extensions_html.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "extensions_html",
|
||||
"category": "registry_operations",
|
||||
"description": "Get file extensions for HTML MIME type",
|
||||
"tags": ["registry", "extensions", "html"],
|
||||
"call": "get_extensions_for_mime",
|
||||
"input": {
|
||||
"mime_type": "text/html"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
15
fixtures/registry/extensions_pdf.json
Normal file
15
fixtures/registry/extensions_pdf.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"id": "extensions_pdf",
|
||||
"category": "registry_operations",
|
||||
"description": "Get file extensions for PDF MIME type",
|
||||
"tags": ["registry", "extensions", "pdf"],
|
||||
"call": "get_extensions_for_mime",
|
||||
"input": {
|
||||
"mime_type": "application/pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
]
|
||||
}
|
||||
9
fixtures/registry/list_document_extractors.json
Normal file
9
fixtures/registry/list_document_extractors.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "list_document_extractors",
|
||||
"category": "registry",
|
||||
"description": "List document extractors",
|
||||
"tags": ["registry"],
|
||||
"call": "list_document_extractors",
|
||||
"input": {},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
9
fixtures/registry/list_embedding_backends.json
Normal file
9
fixtures/registry/list_embedding_backends.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "list_embedding_backends",
|
||||
"category": "registry",
|
||||
"description": "List embedding backends",
|
||||
"tags": ["registry"],
|
||||
"call": "list_embedding_backends",
|
||||
"input": {},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
9
fixtures/registry/list_ocr_backends.json
Normal file
9
fixtures/registry/list_ocr_backends.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "list_ocr_backends",
|
||||
"category": "registry",
|
||||
"description": "List OCR backends",
|
||||
"tags": ["registry"],
|
||||
"call": "list_ocr_backends",
|
||||
"input": {},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
9
fixtures/registry/list_post_processors.json
Normal file
9
fixtures/registry/list_post_processors.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "list_post_processors",
|
||||
"category": "registry",
|
||||
"description": "List post-processors",
|
||||
"tags": ["registry"],
|
||||
"call": "list_post_processors",
|
||||
"input": {},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
9
fixtures/registry/list_renderers.json
Normal file
9
fixtures/registry/list_renderers.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "list_renderers",
|
||||
"category": "registry",
|
||||
"description": "List renderers",
|
||||
"tags": ["registry"],
|
||||
"call": "list_renderers",
|
||||
"input": {},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
9
fixtures/registry/list_validators.json
Normal file
9
fixtures/registry/list_validators.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"id": "list_validators",
|
||||
"category": "registry",
|
||||
"description": "List validators",
|
||||
"tags": ["registry"],
|
||||
"call": "list_validators",
|
||||
"input": {},
|
||||
"assertions": [{ "type": "not_error" }]
|
||||
}
|
||||
32
fixtures/smoke/docx_basic.json
Normal file
32
fixtures/smoke/docx_basic.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"id": "smoke_docx_basic",
|
||||
"category": "smoke",
|
||||
"description": "Smoke test: DOCX with formatted text",
|
||||
"tags": ["smoke", "office", "docx"],
|
||||
"input": {
|
||||
"path": "docx/fake.docx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"config": {}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["Lorem", "ipsum", "document", "text"]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
32
fixtures/smoke/html_basic.json
Normal file
32
fixtures/smoke/html_basic.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"id": "smoke_html_basic",
|
||||
"category": "smoke",
|
||||
"description": "Smoke test: HTML table extraction",
|
||||
"tags": ["smoke", "html"],
|
||||
"input": {
|
||||
"path": "html/simple_table.html",
|
||||
"mime_type": "text/html",
|
||||
"config": {}
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "equals",
|
||||
"field": "mime_type",
|
||||
"value": "text/html"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["Sample Data Table", "Laptop", "Electronics", "Product"]
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": ["wasm"],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user