Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/docx_grouped_images.docx",
"file_type": "docx",
"file_size": 207463,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/docx_grouped_images.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/docx_grouped_images.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/docx_rich_cells.docx",
"file_type": "docx",
"file_size": 24320,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/docx_rich_cells.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/docx_rich_cells.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/docx/docx_tables.docx",
"file_type": "docx",
"file_size": 12725,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "docx test: docx_tables",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/docx_tables.txt",
"markdown_file": "../../../../test_documents/ground_truth/docx/docx_tables.md",
"source": "pandoc"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/equations.docx",
"file_type": "docx",
"file_size": 15814,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "DOCX test document: equations",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"markdown_file": "../../../../test_documents/ground_truth/docx/equations.md",
"source": "pandoc",
"text_file": "../../../../test_documents/ground_truth/docx/equations.txt"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/docx/extraction_test.docx",
"file_type": "docx",
"file_size": 11296,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "docx test: extraction_test",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/extraction_test.txt",
"markdown_file": "../../../../test_documents/ground_truth/docx/extraction_test.md",
"source": "pandoc"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/docx/fake.docx",
"file_type": "docx",
"file_size": 36602,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "docx test: fake",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/fake.txt",
"markdown_file": "../../../../test_documents/ground_truth/docx/fake.md",
"source": "pandoc"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/docx/issue_359_list_whitespace.docx",
"file_type": "docx",
"file_size": 9170,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "docx test: issue_359_list_whitespace",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/issue_359_list_whitespace.txt",
"markdown_file": "../../../../test_documents/ground_truth/docx/issue_359_list_whitespace.md",
"source": "pandoc"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/list_after_num_headers.docx",
"file_type": "docx",
"file_size": 15698,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/list_after_num_headers.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/list_after_num_headers.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/lorem_ipsum.docx",
"file_type": "docx",
"file_size": 14817,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/lorem_ipsum.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/lorem_ipsum.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/markitdown/docx/rlink.docx",
"file_type": "docx",
"file_size": 13708,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from markitdown test suite",
"source": "markitdown",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/rlink.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/rlink.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/docx/sample_document.docx",
"file_type": "docx",
"file_size": 103966,
"expected_frameworks": ["kreuzberg"],
"metadata": {
"description": "docx test: sample_document",
"source": "pandoc-generated",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/sample_document.txt",
"markdown_file": "../../../../test_documents/ground_truth/docx/sample_document.md",
"source": "pandoc"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/table_with_equations.docx",
"file_type": "docx",
"file_size": 14228,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/table_with_equations.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/table_with_equations.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/tablecell.docx",
"file_type": "docx",
"file_size": 15180,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/tablecell.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/tablecell.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/markitdown/docx/test.docx",
"file_type": "docx",
"file_size": 135824,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from markitdown test suite",
"source": "markitdown",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/test.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/test.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/test_emf_docx.docx",
"file_type": "docx",
"file_size": 426097,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/test_emf_docx.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/test_emf_docx.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/markitdown/docx/test_with_comment.docx",
"file_type": "docx",
"file_size": 12971,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from markitdown test suite",
"source": "markitdown",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/test_with_comment.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/test_with_comment.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/textbox.docx",
"file_type": "docx",
"file_size": 49206,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/textbox.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/textbox.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/unit_test_formatting.docx",
"file_type": "docx",
"file_size": 29099,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/unit_test_formatting.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_formatting.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/unit_test_headers.docx",
"file_type": "docx",
"file_size": 13903,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/unit_test_headers.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_headers.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/unit_test_headers_numbered.docx",
"file_type": "docx",
"file_size": 16880,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/unit_test_headers_numbered.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_headers_numbered.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/unit_test_lists.docx",
"file_type": "docx",
"file_size": 15769,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/unit_test_lists.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/unit_test_lists.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/word_comments.docx",
"file_type": "docx",
"file_size": 37399,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/word_comments.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/word_comments.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/word_image_anchors.docx",
"file_type": "docx",
"file_size": 18560,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/word_image_anchors.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/word_image_anchors.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/word_sample.docx",
"file_type": "docx",
"file_size": 103966,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/word_sample.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/word_sample.md"
}
}

View File

@@ -0,0 +1,16 @@
{
"document": "../../../../test_documents/vendored/docling/docx/word_tables.docx",
"file_type": "docx",
"file_size": 16404,
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/docx/word_tables.txt",
"source": "pandoc",
"markdown_file": "../../../../test_documents/ground_truth/docx/word_tables.md"
}
}