Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/xlsx/excel_multi_sheet.xlsx",
"file_type": "xlsx",
"file_size": 6166,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "xlsx test: excel_multi_sheet",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"markdown_file": "../../../../test_documents/ground_truth/xlsx/excel_multi_sheet.md",
"source": "pandoc",
"text_file": "../../../../test_documents/ground_truth/xlsx/excel_multi_sheet.txt"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/xlsx/stanley_cups.xlsx",
"file_type": "xlsx",
"file_size": 6339,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "xlsx test: stanley_cups",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"markdown_file": "../../../../test_documents/ground_truth/xlsx/stanley_cups.md",
"source": "pandoc",
"text_file": "../../../../test_documents/ground_truth/xlsx/stanley_cups.txt"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/markitdown/xlsx/test.xlsx",
"file_type": "xlsx",
"file_size": 11562,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from markitdown test suite",
"source": "markitdown",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/test.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/test.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/xlsx/test_01.xlsx",
"file_type": "xlsx",
"file_size": 170052,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "xlsx test: test_01",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"markdown_file": "../../../../test_documents/ground_truth/xlsx/test_01.md",
"source": "pandoc",
"text_file": "../../../../test_documents/ground_truth/xlsx/test_01.txt"
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,17 @@
{
"document": "../../../../test_documents/xlsx/test_01.xlsm",
"file_type": "xlsm",
"file_size": 162186,
"expected_frameworks": ["kreuzberg", "tika"],
"metadata": {
"description": "Excel macro-enabled workbook (.xlsm format) - converted from test_01.xlsx",
"category": "structured",
"size_category": "medium",
"excel_variant": "macro-enabled",
"notes": "XLSM files support VBA macros and advanced Excel features"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsm/xlsm_macro_enabled.txt",
"source": "openpyxl"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/xlsx/xlsx_01.xlsx",
"file_type": "xlsx",
"file_size": 170934,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/xlsx_01.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/xlsx_01.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/xlsx/xlsx_03_chartsheet.xlsx",
"file_type": "xlsx",
"file_size": 10491,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/xlsx_03_chartsheet.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/xlsx_03_chartsheet.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/xlsx/xlsx_04_inflated.xlsx",
"file_type": "xlsx",
"file_size": 171916,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/xlsx_04_inflated.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/xlsx_04_inflated.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/xlsx/xlsx_05_table_with_title.xlsx",
"file_type": "xlsx",
"file_size": 6335,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/xlsx_05_table_with_title.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/xlsx_05_table_with_title.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/xlsx/xlsx_06_edge_cases_.xlsx",
"file_type": "xlsx",
"file_size": 9504,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/xlsx_06_edge_cases_.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/xlsx_06_edge_cases_.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/xlsx/xlsx_07_gap_tolerance_.xlsx",
"file_type": "xlsx",
"file_size": 16217,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/xlsx/xlsx_07_gap_tolerance_.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/xlsx/xlsx_07_gap_tolerance_.md"
}
}