This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/2203.01017v2.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 66446,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/2203.01017v2.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/txt/2206.01062.doctags.json
Normal file
15
tools/benchmark-harness/fixtures/txt/2206.01062.doctags.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/2206.01062.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 55990,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/2206.01062.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/2305.03393v1-pg9.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 3210,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/2305.03393v1-pg9.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/2305.03393v1.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 39960,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/2305.03393v1.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/amt_handbook_sample.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 4597,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/amt_handbook_sample.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/code_and_formula.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 6418,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/code_and_formula.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/txt/multi_page.doctags.json
Normal file
15
tools/benchmark-harness/fixtures/txt/multi_page.doctags.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/multi_page.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 12778,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/multi_page.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/picture_classification.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 4075,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/picture_classification.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/redp5110_sampled.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 44745,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/redp5110_sampled.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/right_to_left_01.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 2658,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/right_to_left_01.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/right_to_left_02.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 2235,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/right_to_left_02.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/txt/right_to_left_03.doctags.txt",
|
||||
"file_type": "txt",
|
||||
"file_size": 3199,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "pymupdf4llm", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/txt/right_to_left_03.doctags.txt",
|
||||
"source": "vision"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user