This commit is contained in:
16
tools/benchmark-harness/fixtures/csv/csv-comma-in-cell.json
Normal file
16
tools/benchmark-harness/fixtures/csv/csv-comma-in-cell.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-comma-in-cell.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 46,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-comma-in-cell.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-comma-in-cell.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/csv-comma.json
Normal file
16
tools/benchmark-harness/fixtures/csv/csv-comma.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-comma.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 1005,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-comma.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-comma.md"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-inconsistent-header.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 42,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-inconsistent-header.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-inconsistent-header.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/csv-pipe.json
Normal file
16
tools/benchmark-harness/fixtures/csv/csv-pipe.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-pipe.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 997,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-pipe.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-pipe.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/csv-semicolon.json
Normal file
16
tools/benchmark-harness/fixtures/csv/csv-semicolon.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-semicolon.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 997,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-semicolon.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-semicolon.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/csv-tab.json
Normal file
16
tools/benchmark-harness/fixtures/csv/csv-tab.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-tab.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 997,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-tab.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-tab.md"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-too-few-columns.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 44,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-too-few-columns.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-too-few-columns.md"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/docling/csv/csv-too-many-columns.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 46,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from docling test suite",
|
||||
"source": "docling",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/csv-too-many-columns.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/csv-too-many-columns.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/data_table.json
Normal file
16
tools/benchmark-harness/fixtures/csv/data_table.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/csv/data_table.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 476,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "csv test: data_table",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/data_table.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/data_table.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/stanley_cups.json
Normal file
16
tools/benchmark-harness/fixtures/csv/stanley_cups.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/csv/stanley_cups.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 91,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "csv test: stanley_cups",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/stanley_cups.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/stanley_cups.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/csv/test_mskanji.json
Normal file
16
tools/benchmark-harness/fixtures/csv/test_mskanji.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/markitdown/csv/test_mskanji.csv",
|
||||
"file_type": "csv",
|
||||
"file_size": 70,
|
||||
"expected_frameworks": ["kreuzberg", "docling", "markitdown", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from markitdown test suite",
|
||||
"source": "markitdown",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/csv/test_mskanji.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/csv/test_mskanji.md"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user