Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
{
"id": "format_docx_standalone",
"category": "format_specific",
"description": "Standalone DOCX extraction using extract_bytes_sync",
"tags": ["format_specific", "docx", "text_extraction"],
"call": "extract_bytes_sync",
"input": {
"data": "docx/fake.docx",
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
},
"assertions": [
{
"type": "not_error"
},
{
"type": "min_length",
"field": "content",
"value": 20
}
]
}

View File

@@ -0,0 +1,26 @@
{
"id": "format_hwpx_standalone",
"category": "format_specific",
"description": "Standalone HWPX extraction using extract_bytes_sync",
"tags": ["format_specific", "hwpx", "text_extraction"],
"call": "extract_bytes_sync",
"input": {
"data": "hwpx/simple.hwpx",
"mime_type": "application/haansofthwpx"
},
"assertions": [
{
"type": "not_error"
},
{
"type": "min_length",
"field": "content",
"value": 20
},
{
"type": "contains",
"field": "content",
"value": "Hello from HWPX"
}
]
}

View File

@@ -0,0 +1,26 @@
{
"id": "format_pdf_text",
"category": "format_specific",
"description": "Standalone PDF text extraction using extract_bytes_sync",
"tags": ["format_specific", "pdf", "text_extraction"],
"call": "extract_bytes_sync",
"input": {
"data": "pdf/fake_memo.pdf",
"mime_type": "application/pdf"
},
"assertions": [
{
"type": "not_error"
},
{
"type": "min_length",
"field": "content",
"value": 50
},
{
"type": "contains_any",
"field": "content",
"values": ["Mallori", "May"]
}
]
}

View File

@@ -0,0 +1,26 @@
{
"id": "format_pptx",
"category": "format_specific",
"description": "PPTX presentation extraction using extract_file_sync",
"tags": [
"format_specific",
"pptx",
"text_extraction"
],
"call": "extract_file_sync",
"input": {
"path": "pptx/simple.pptx",
"mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation"
},
"assertions": [
{
"type": "not_error"
}
],
"skip": {
"languages": [
"wasm"
],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}

View File

@@ -0,0 +1,26 @@
{
"id": "format_xlsx",
"category": "format_specific",
"description": "XLSX spreadsheet extraction using extract_file_sync",
"tags": [
"format_specific",
"xlsx",
"text_extraction"
],
"call": "extract_file_sync",
"input": {
"path": "xlsx/stanley_cups.xlsx",
"mime_type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
},
"assertions": [
{
"type": "not_error"
}
],
"skip": {
"languages": [
"wasm"
],
"reason": "WASM cannot access filesystem; use extractBytes with file content instead"
}
}