This commit is contained in:
21
fixtures/format_specific/format_docx_standalone.json
Normal file
21
fixtures/format_specific/format_docx_standalone.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"id": "format_docx_standalone",
|
||||
"category": "format_specific",
|
||||
"description": "Standalone DOCX extraction using extract_bytes_sync",
|
||||
"tags": ["format_specific", "docx", "text_extraction"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "docx/fake.docx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 20
|
||||
}
|
||||
]
|
||||
}
|
||||
26
fixtures/format_specific/format_hwpx_standalone.json
Normal file
26
fixtures/format_specific/format_hwpx_standalone.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_hwpx_standalone",
|
||||
"category": "format_specific",
|
||||
"description": "Standalone HWPX extraction using extract_bytes_sync",
|
||||
"tags": ["format_specific", "hwpx", "text_extraction"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "hwpx/simple.hwpx",
|
||||
"mime_type": "application/haansofthwpx"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"type": "contains",
|
||||
"field": "content",
|
||||
"value": "Hello from HWPX"
|
||||
}
|
||||
]
|
||||
}
|
||||
26
fixtures/format_specific/format_pdf_text.json
Normal file
26
fixtures/format_specific/format_pdf_text.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_pdf_text",
|
||||
"category": "format_specific",
|
||||
"description": "Standalone PDF text extraction using extract_bytes_sync",
|
||||
"tags": ["format_specific", "pdf", "text_extraction"],
|
||||
"call": "extract_bytes_sync",
|
||||
"input": {
|
||||
"data": "pdf/fake_memo.pdf",
|
||||
"mime_type": "application/pdf"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
},
|
||||
{
|
||||
"type": "min_length",
|
||||
"field": "content",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"type": "contains_any",
|
||||
"field": "content",
|
||||
"values": ["Mallori", "May"]
|
||||
}
|
||||
]
|
||||
}
|
||||
26
fixtures/format_specific/format_pptx.json
Normal file
26
fixtures/format_specific/format_pptx.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_pptx",
|
||||
"category": "format_specific",
|
||||
"description": "PPTX presentation extraction using extract_file_sync",
|
||||
"tags": [
|
||||
"format_specific",
|
||||
"pptx",
|
||||
"text_extraction"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "pptx/simple.pptx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
26
fixtures/format_specific/format_xlsx.json
Normal file
26
fixtures/format_specific/format_xlsx.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "format_xlsx",
|
||||
"category": "format_specific",
|
||||
"description": "XLSX spreadsheet extraction using extract_file_sync",
|
||||
"tags": [
|
||||
"format_specific",
|
||||
"xlsx",
|
||||
"text_extraction"
|
||||
],
|
||||
"call": "extract_file_sync",
|
||||
"input": {
|
||||
"path": "xlsx/stanley_cups.xlsx",
|
||||
"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
},
|
||||
"assertions": [
|
||||
{
|
||||
"type": "not_error"
|
||||
}
|
||||
],
|
||||
"skip": {
|
||||
"languages": [
|
||||
"wasm"
|
||||
],
|
||||
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user