Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2203.01017v2.json",
"file_type": "json",
"file_size": 663249,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2203.01017v2.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2203.01017v2.pages.meta.json",
"file_type": "json",
"file_size": 477,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2203.01017v2.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2206.01062.json",
"file_type": "json",
"file_size": 939124,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2206.01062.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2206.01062.pages.meta.json",
"file_type": "json",
"file_size": 272,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2206.01062.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2305.03393v1-pg9.json",
"file_type": "json",
"file_size": 64383,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2305.03393v1-pg9.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2305.03393v1-pg9.pages.meta.json",
"file_type": "json",
"file_size": 33,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2305.03393v1-pg9.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2305.03393v1.json",
"file_type": "json",
"file_size": 403019,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2305.03393v1.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/2305.03393v1.pages.meta.json",
"file_type": "json",
"file_size": 412,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/2305.03393v1.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/amt_handbook_sample.json",
"file_type": "json",
"file_size": 24952,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/amt_handbook_sample.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/amt_handbook_sample.pages.meta.json",
"file_type": "json",
"file_size": 33,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/amt_handbook_sample.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/code_and_formula.json",
"file_type": "json",
"file_size": 21078,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/code_and_formula.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/code_and_formula.pages.meta.json",
"file_type": "json",
"file_size": 61,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/code_and_formula.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-comma-in-cell.csv.json",
"file_type": "json",
"file_size": 17304,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-comma-in-cell.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-comma.csv.json",
"file_type": "json",
"file_size": 60861,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-comma.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-inconsistent-header.csv.json",
"file_type": "json",
"file_size": 16927,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-inconsistent-header.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-pipe.csv.json",
"file_type": "json",
"file_size": 60848,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-pipe.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-semicolon.csv.json",
"file_type": "json",
"file_size": 60859,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-semicolon.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-tab.csv.json",
"file_type": "json",
"file_size": 60850,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-tab.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-too-few-columns.csv.json",
"file_type": "json",
"file_size": 16921,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-too-few-columns.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/csv-too-many-columns.csv.json",
"file_type": "json",
"file_size": 19770,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/csv-too-many-columns.csv.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/deepseek_example.md.json",
"file_type": "json",
"file_size": 71809,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/deepseek_example.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/deepseek_simple.md.json",
"file_type": "json",
"file_size": 10372,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/deepseek_simple.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/deepseek_title.md.json",
"file_type": "json",
"file_size": 19823,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/deepseek_title.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/docx_grouped_images.docx.json",
"file_type": "json",
"file_size": 152115,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/docx_grouped_images.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/docx_rich_cells.docx.json",
"file_type": "json",
"file_size": 82392,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/docx_rich_cells.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/drawingml.docx.json",
"file_type": "json",
"file_size": 88806,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/drawingml.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/elife-56337.nxml.json",
"file_type": "json",
"file_size": 352303,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/elife-56337.nxml.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/equations.docx.json",
"file_type": "json",
"file_size": 20636,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/equations.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/escaped_characters.md.json",
"file_type": "json",
"file_size": 19472,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/escaped_characters.md.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_01.html.json",
"file_type": "json",
"file_size": 5851,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_01_images.html.json",
"file_type": "json",
"file_size": 717612,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_01_images.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_02.html.json",
"file_type": "json",
"file_size": 4056,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_03.html.json",
"file_type": "json",
"file_size": 17188,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_03.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_04.html.json",
"file_type": "json",
"file_size": 10366,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_04.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_05.html.json",
"file_type": "json",
"file_size": 10348,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_05.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_06.html.json",
"file_type": "json",
"file_size": 4814,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_06.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_07.html.json",
"file_type": "json",
"file_size": 7263,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_07.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/example_08.html.json",
"file_type": "json",
"file_size": 64838,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/example_08.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/formatting.html.json",
"file_type": "json",
"file_size": 25210,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/formatting.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/html_code_snippets.html.json",
"file_type": "json",
"file_size": 14947,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/html_code_snippets.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/html_heading_in_p.html.json",
"file_type": "json",
"file_size": 20611,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/html_heading_in_p.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/html_rich_table_cells.html.json",
"file_type": "json",
"file_size": 63348,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/html_rich_table_cells.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/hyperlink_01.html.json",
"file_type": "json",
"file_size": 2131,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/hyperlink_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/hyperlink_02.html.json",
"file_type": "json",
"file_size": 1951,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/hyperlink_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/hyperlink_03.html.json",
"file_type": "json",
"file_size": 6996,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/hyperlink_03.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/hyperlink_04.html.json",
"file_type": "json",
"file_size": 980,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/hyperlink_04.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/hyperlink_05.html.json",
"file_type": "json",
"file_size": 3265,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/hyperlink_05.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/ipa20180000016.json",
"file_type": "json",
"file_size": 291123,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/ipa20180000016.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/ipa20200022300.json",
"file_type": "json",
"file_size": 120441,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/ipa20200022300.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/list_after_num_headers.docx.json",
"file_type": "json",
"file_size": 3681,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/list_after_num_headers.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/lorem_ipsum.docx.json",
"file_type": "json",
"file_size": 10926,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/lorem_ipsum.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/multi_page.json",
"file_type": "json",
"file_size": 53493,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/multi_page.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/multi_page.pages.meta.json",
"file_type": "json",
"file_size": 147,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/multi_page.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/pa20010031492.json",
"file_type": "json",
"file_size": 99911,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/pa20010031492.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/pftaps057006474.json",
"file_type": "json",
"file_size": 68409,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/pftaps057006474.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/pg06442728.json",
"file_type": "json",
"file_size": 93078,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/pg06442728.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/picture_classification.json",
"file_type": "json",
"file_size": 14219,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/picture_classification.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/picture_classification.pages.meta.json",
"file_type": "json",
"file_size": 61,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/picture_classification.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/pntd.0008301.nxml.json",
"file_type": "json",
"file_size": 311704,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/pntd.0008301.nxml.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/pone.0234687.nxml.json",
"file_type": "json",
"file_size": 559991,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/pone.0234687.nxml.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/powerpoint_bad_text.pptx.json",
"file_type": "json",
"file_size": 1869,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/powerpoint_bad_text.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/powerpoint_issue_2663.pptx.json",
"file_type": "json",
"file_size": 8989,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/powerpoint_issue_2663.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/powerpoint_sample.pptx.json",
"file_type": "json",
"file_size": 61244,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/powerpoint_sample.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/powerpoint_with_image.pptx.json",
"file_type": "json",
"file_size": 56295,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/powerpoint_with_image.pptx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/redp5110_sampled.json",
"file_type": "json",
"file_size": 465656,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/redp5110_sampled.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/redp5110_sampled.pages.meta.json",
"file_type": "json",
"file_size": 524,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/redp5110_sampled.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/right_to_left_01.json",
"file_type": "json",
"file_size": 15772,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/right_to_left_01.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/right_to_left_01.pages.meta.json",
"file_type": "json",
"file_size": 32,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/right_to_left_01.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/right_to_left_02.json",
"file_type": "json",
"file_size": 15601,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/right_to_left_02.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/right_to_left_02.pages.meta.json",
"file_type": "json",
"file_size": 32,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/right_to_left_02.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/right_to_left_03.json",
"file_type": "json",
"file_size": 27176,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/right_to_left_03.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/right_to_left_03.pages.meta.json",
"file_type": "json",
"file_size": 32,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/right_to_left_03.pages.meta.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_01.html.json",
"file_type": "json",
"file_size": 5307,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_02.html.json",
"file_type": "json",
"file_size": 6691,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_03.html.json",
"file_type": "json",
"file_size": 7038,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_03.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_04.html.json",
"file_type": "json",
"file_size": 7442,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_04.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_05.html.json",
"file_type": "json",
"file_size": 11053,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_05.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_06.html.json",
"file_type": "json",
"file_size": 22830,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_06.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_with_equations.docx.json",
"file_type": "json",
"file_size": 4943,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_with_equations.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_with_heading_01.html.json",
"file_type": "json",
"file_size": 5001,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_with_heading_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/table_with_heading_02.html.json",
"file_type": "json",
"file_size": 6633,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/table_with_heading_02.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/tablecell.docx.json",
"file_type": "json",
"file_size": 11513,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/tablecell.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/markitdown/json/test.json",
"file_type": "json",
"file_size": 229,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from markitdown test suite",
"source": "markitdown",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/test.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/test_emf_docx.docx.json",
"file_type": "json",
"file_size": 137152,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/test_emf_docx.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/textbox.docx.json",
"file_type": "json",
"file_size": 31501,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/textbox.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/unit_test_01.html.json",
"file_type": "json",
"file_size": 3174,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/unit_test_01.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/unit_test_formatting.docx.json",
"file_type": "json",
"file_size": 21104,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/unit_test_formatting.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/unit_test_headers.docx.json",
"file_type": "json",
"file_size": 17101,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/unit_test_headers.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/unit_test_headers_numbered.docx.json",
"file_type": "json",
"file_size": 18102,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/unit_test_headers_numbered.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/unit_test_lists.docx.json",
"file_type": "json",
"file_size": 24099,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/unit_test_lists.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/webvtt_example_01.vtt.json",
"file_type": "json",
"file_size": 7472,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/webvtt_example_01.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/webvtt_example_02.vtt.json",
"file_type": "json",
"file_size": 5704,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/webvtt_example_02.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/webvtt_example_03.vtt.json",
"file_type": "json",
"file_size": 9873,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/webvtt_example_03.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/webvtt_example_04.vtt.json",
"file_type": "json",
"file_size": 7851,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/webvtt_example_04.vtt.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/wiki_duck.html.json",
"file_type": "json",
"file_size": 636994,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/wiki_duck.html.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/word_comments.json",
"file_type": "json",
"file_size": 5686,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/word_comments.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/word_image_anchors.docx.json",
"file_type": "json",
"file_size": 13525,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/word_image_anchors.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/word_sample.docx.json",
"file_type": "json",
"file_size": 109876,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/word_sample.docx.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/word_sample.json",
"file_type": "json",
"file_size": 103166,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/word_sample.txt",
"source": "vision"
}
}

View File

@@ -0,0 +1,15 @@
{
"document": "../../../../test_documents/vendored/docling/json/word_tables.docx.json",
"file_type": "json",
"file_size": 76177,
"expected_frameworks": ["kreuzberg", "markitdown", "tika"],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "small"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/json/word_tables.docx.txt",
"source": "vision"
}
}

Some files were not shown because too many files have changed in this diff Show More