Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
{
"id": "smoke_docx_basic",
"category": "smoke",
"description": "Smoke test: DOCX with formatted text",
"tags": ["smoke", "office", "docx"],
"input": {
"path": "docx/fake.docx",
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
},
{
"type": "min_length",
"field": "content",
"value": 20
},
{
"type": "contains_any",
"field": "content",
"values": ["Lorem", "ipsum", "document", "text"]
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,32 @@
{
"id": "smoke_html_basic",
"category": "smoke",
"description": "Smoke test: HTML table extraction",
"tags": ["smoke", "html"],
"input": {
"path": "html/simple_table.html",
"mime_type": "text/html",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "text/html"
},
{
"type": "min_length",
"field": "content",
"value": 10
},
{
"type": "contains_any",
"field": "content",
"values": ["Sample Data Table", "Laptop", "Electronics", "Product"]
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,24 @@
{
"id": "smoke_image_png",
"category": "smoke",
"description": "Smoke test: PNG image (without OCR, metadata only)",
"tags": ["smoke", "image", "png"],
"input": {
"path": "images/sample.png",
"media_type": "image/png",
"config": {
"disable_ocr": true
}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "image/png"
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,27 @@
{
"id": "smoke_json_basic",
"category": "smoke",
"description": "Smoke test: JSON file extraction",
"tags": ["smoke", "structured", "json"],
"input": {
"path": "json/simple.json",
"mime_type": "application/json",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "application/json"
},
{
"type": "min_length",
"field": "content",
"value": 5
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,29 @@
{
"id": "ocr_image_png",
"category": "smoke",
"description": "OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.",
"tags": ["smoke", "ocr", "image", "png"],
"call": "extract_bytes",
"input": {
"data": "images/test_hello_world.png",
"mime_type": "image/png",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "image/png"
},
{
"type": "min_length",
"field": "content",
"value": 1
},
{
"type": "contains_any",
"field": "content",
"values": ["Hello", "World", "hello", "world"]
}
]
}

View File

@@ -0,0 +1,32 @@
{
"id": "smoke_pdf_basic",
"category": "smoke",
"description": "Smoke test: PDF with simple text extraction",
"tags": ["smoke", "pdf", "basic"],
"input": {
"path": "pdf/fake_memo.pdf",
"mime_type": "application/pdf",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "application/pdf"
},
{
"type": "min_length",
"field": "content",
"value": 50
},
{
"type": "contains_any",
"field": "content",
"values": ["May 5, 2023", "To Whom it May Concern"]
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,27 @@
{
"id": "smoke_txt_basic",
"category": "smoke",
"description": "Smoke test: Plain text file",
"tags": ["smoke", "text", "plaintext"],
"input": {
"path": "text/report.txt",
"mime_type": "text/plain",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "text/plain"
},
{
"type": "min_length",
"field": "content",
"value": 5
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,57 @@
{
"id": "smoke_xlsx_basic",
"category": "smoke",
"description": "Smoke test: XLSX with basic spreadsheet data including tables",
"tags": ["smoke", "office", "xlsx", "tables"],
"input": {
"path": "xlsx/stanley_cups.xlsx",
"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"config": {}
},
"assertions": [
{
"type": "equals",
"field": "mime_type",
"value": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
},
{
"type": "min_length",
"field": "content",
"value": 100
},
{
"type": "contains_all",
"field": "content",
"values": [
"Team",
"Location",
"Stanley Cups",
"Blues",
"Flyers",
"Maple Leafs",
"STL",
"PHI",
"TOR"
]
},
{
"type": "count_min",
"field": "tables",
"value": 1
},
{
"type": "greater_than_or_equal",
"field": "metadata.format.excel.sheet_count",
"value": 2
},
{
"type": "contains_all",
"field": "metadata.format.excel.sheet_names",
"values": ["Stanley Cups"]
}
],
"skip": {
"languages": ["wasm"],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}