Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/2203.01017v2.md",
"file_type": "md",
"file_size": 54216,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/2203.01017v2.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/2206.01062.md",
"file_type": "md",
"file_size": 51516,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/2206.01062.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/2305.03393v1-pg9.md",
"file_type": "md",
"file_size": 2830,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/2305.03393v1-pg9.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/2305.03393v1.md",
"file_type": "md",
"file_size": 32105,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/2305.03393v1.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/amt_handbook_sample.md",
"file_type": "md",
"file_size": 3620,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/amt_handbook_sample.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/blocks.md.md",
"file_type": "md",
"file_size": 388,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/blocks.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/code_and_formula.md",
"file_type": "md",
"file_size": 5538,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/code_and_formula.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-comma-in-cell.csv.md",
"file_type": "md",
"file_size": 156,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-comma-in-cell.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-comma.csv.md",
"file_type": "md",
"file_size": 1911,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-comma.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-inconsistent-header.csv.md",
"file_type": "md",
"file_size": 150,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-inconsistent-header.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-pipe.csv.md",
"file_type": "md",
"file_size": 1939,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-pipe.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-semicolon.csv.md",
"file_type": "md",
"file_size": 1904,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-semicolon.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-tab.csv.md",
"file_type": "md",
"file_size": 1854,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-tab.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-too-few-columns.csv.md",
"file_type": "md",
"file_size": 156,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-too-few-columns.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/csv-too-many-columns.csv.md",
"file_type": "md",
"file_size": 186,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/csv-too-many-columns.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/deepseek_example.md.md",
"file_type": "md",
"file_size": 3039,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/deepseek_example.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/deepseek_simple.md.md",
"file_type": "md",
"file_size": 1342,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/deepseek_simple.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/deepseek_title.md.md",
"file_type": "md",
"file_size": 2950,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/deepseek_title.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/docx_grouped_images.docx.md",
"file_type": "md",
"file_size": 335,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/docx_grouped_images.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/docx_rich_cells.docx.md",
"file_type": "md",
"file_size": 2574,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/docx_rich_cells.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/drawingml.docx.md",
"file_type": "md",
"file_size": 47,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/drawingml.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/duck.md.md",
"file_type": "md",
"file_size": 1041,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/duck.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/elife-56337.nxml.md",
"file_type": "md",
"file_size": 77781,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/elife-56337.nxml.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/ending_with_table.md.md",
"file_type": "md",
"file_size": 462,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/ending_with_table.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/equations.docx.md",
"file_type": "md",
"file_size": 2267,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/equations.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/escaped_characters.md.md",
"file_type": "md",
"file_size": 729,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/escaped_characters.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_01.html.md",
"file_type": "md",
"file_size": 358,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_01_images.html.md",
"file_type": "md",
"file_size": 358,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_01_images.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_02.html.md",
"file_type": "md",
"file_size": 241,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_03.html.md",
"file_type": "md",
"file_size": 611,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_03.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_04.html.md",
"file_type": "md",
"file_size": 484,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_04.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_05.html.md",
"file_type": "md",
"file_size": 475,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_05.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_06.html.md",
"file_type": "md",
"file_size": 245,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_06.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_07.html.md",
"file_type": "md",
"file_size": 202,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_07.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/example_08.html.md",
"file_type": "md",
"file_size": 1174,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/example_08.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/formatting.html.md",
"file_type": "md",
"file_size": 934,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/formatting.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/html_code_snippets.html.md",
"file_type": "md",
"file_size": 1051,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/html_code_snippets.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/html_heading_in_p.html.md",
"file_type": "md",
"file_size": 265,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/html_heading_in_p.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/html_rich_table_cells.html.md",
"file_type": "md",
"file_size": 3000,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/html_rich_table_cells.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/hyperlink_01.html.md",
"file_type": "md",
"file_size": 57,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/hyperlink_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/hyperlink_02.html.md",
"file_type": "md",
"file_size": 22,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/hyperlink_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/hyperlink_03.html.md",
"file_type": "md",
"file_size": 303,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/hyperlink_03.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/hyperlink_04.html.md",
"file_type": "md",
"file_size": 34,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/hyperlink_04.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/hyperlink_05.html.md",
"file_type": "md",
"file_size": 152,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/hyperlink_05.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/inline_and_formatting.md.md",
"file_type": "md",
"file_size": 941,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/inline_and_formatting.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/ipa20180000016.md",
"file_type": "md",
"file_size": 67171,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/ipa20180000016.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/ipa20200022300.md",
"file_type": "md",
"file_size": 48801,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/ipa20200022300.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/list_after_num_headers.docx.md",
"file_type": "md",
"file_size": 164,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/list_after_num_headers.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/lorem_ipsum.docx.md",
"file_type": "md",
"file_size": 3487,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/lorem_ipsum.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/mixed.md.md",
"file_type": "md",
"file_size": 610,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/mixed.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/mixed_without_h1.md.md",
"file_type": "md",
"file_size": 108,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/mixed_without_h1.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/multi_page.md",
"file_type": "md",
"file_size": 9393,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/multi_page.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/nested.md.md",
"file_type": "md",
"file_size": 477,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/nested.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/pa20010031492.md",
"file_type": "md",
"file_size": 26311,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/pa20010031492.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/pftaps057006474.md",
"file_type": "md",
"file_size": 25649,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/pftaps057006474.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/pg06442728.md",
"file_type": "md",
"file_size": 29728,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/pg06442728.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/picture_classification.md",
"file_type": "md",
"file_size": 3458,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/picture_classification.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/pntd.0008301.nxml.md",
"file_type": "md",
"file_size": 55251,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/pntd.0008301.nxml.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/pone.0234687.nxml.md",
"file_type": "md",
"file_size": 69786,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/pone.0234687.nxml.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/powerpoint_bad_text.pptx.md",
"file_type": "md",
"file_size": 121,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/powerpoint_bad_text.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/powerpoint_issue_2663.pptx.md",
"file_type": "md",
"file_size": 635,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/powerpoint_issue_2663.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/powerpoint_sample.pptx.md",
"file_type": "md",
"file_size": 1184,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/powerpoint_sample.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/powerpoint_with_image.pptx.md",
"file_type": "md",
"file_size": 60,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/powerpoint_with_image.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/redp5110_sampled.md",
"file_type": "md",
"file_size": 40112,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/redp5110_sampled.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/right_to_left_01.md",
"file_type": "md",
"file_size": 2514,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/right_to_left_01.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/right_to_left_02.md",
"file_type": "md",
"file_size": 2004,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/right_to_left_02.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/right_to_left_03.md",
"file_type": "md",
"file_size": 1359,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/right_to_left_03.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_01.html.md",
"file_type": "md",
"file_size": 101,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_02.html.md",
"file_type": "md",
"file_size": 239,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_03.html.md",
"file_type": "md",
"file_size": 206,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_03.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_04.html.md",
"file_type": "md",
"file_size": 275,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_04.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_05.html.md",
"file_type": "md",
"file_size": 293,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_05.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_06.html.md",
"file_type": "md",
"file_size": 2858,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_06.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_with_equations.docx.md",
"file_type": "md",
"file_size": 240,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_with_equations.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_with_heading_01.html.md",
"file_type": "md",
"file_size": 83,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_with_heading_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/table_with_heading_02.html.md",
"file_type": "md",
"file_size": 139,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/table_with_heading_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/tablecell.docx.md",
"file_type": "md",
"file_size": 176,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/tablecell.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/test_01.asciidoc.md",
"file_type": "md",
"file_size": 376,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/test_01.asciidoc.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/test_02.asciidoc.md",
"file_type": "md",
"file_size": 1987,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/test_02.asciidoc.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/test_03.asciidoc.md",
"file_type": "md",
"file_size": 646,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/test_03.asciidoc.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/test_emf_docx.docx.md",
"file_type": "md",
"file_size": 139,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/test_emf_docx.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/textbox.docx.md",
"file_type": "md",
"file_size": 1959,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/textbox.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/unit_test_01.html.md",
"file_type": "md",
"file_size": 107,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/unit_test_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/unit_test_formatting.docx.md",
"file_type": "md",
"file_size": 500,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/unit_test_formatting.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/unit_test_headers.docx.md",
"file_type": "md",
"file_size": 373,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/unit_test_headers.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/unit_test_headers_numbered.docx.md",
"file_type": "md",
"file_size": 401,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/unit_test_headers_numbered.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/unit_test_lists.docx.md",
"file_type": "md",
"file_size": 560,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/unit_test_lists.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/webvtt_example_01.vtt.md",
"file_type": "md",
"file_size": 515,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/webvtt_example_01.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/webvtt_example_02.vtt.md",
"file_type": "md",
"file_size": 117,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/webvtt_example_02.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/webvtt_example_03.vtt.md",
"file_type": "md",
"file_size": 348,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/webvtt_example_03.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/webvtt_example_04.vtt.md",
"file_type": "md",
"file_size": 272,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/webvtt_example_04.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/wiki.md.md",
"file_type": "md",
"file_size": 4790,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/wiki.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/wiki_duck.html.md",
"file_type": "md",
"file_size": 59879,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/wiki_duck.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/word_comments.md",
"file_type": "md",
"file_size": 240,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/word_comments.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/word_image_anchors.docx.md",
"file_type": "md",
"file_size": 175,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/word_image_anchors.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/word_sample.docx.md",
"file_type": "md",
"file_size": 1003,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/word_sample.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/word_sample.md",
"file_type": "md",
"file_size": 997,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/word_sample.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/word_tables.docx.md",
"file_type": "md",
"file_size": 2117,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/word_tables.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/xlsx_01.xlsx.md",
"file_type": "md",
"file_size": 1753,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/xlsx_01.xlsx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/md/xlsx_02_sample_sales_data.xlsm.md",
"file_type": "md",
"file_size": 1342,
"expected_frameworks": ["kreuzberg", "docling", "pandoc", "tika", "unstructured"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/md/xlsx_02_sample_sales_data.xlsm.txt",
"source": "vision"
}
}

Some files were not shown because too many files have changed in this diff Show More