This commit is contained in:
16
tools/benchmark-harness/fixtures/epub/epub2_cover.json
Normal file
16
tools/benchmark-harness/fixtures/epub/epub2_cover.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/epub2_cover.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 11794,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "epub test: epub2_cover",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/epub2_cover.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/epub2_cover.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/epub/epub2_no_cover.json
Normal file
16
tools/benchmark-harness/fixtures/epub/epub2_no_cover.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/epub2_no_cover.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 3584,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "epub test: epub2_no_cover",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/epub2_no_cover.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/epub2_no_cover.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/epub/features.json
Normal file
16
tools/benchmark-harness/fixtures/epub/features.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/features.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 8970,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "EPUB test document: features",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/features.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/features.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/epub/img.json
Normal file
16
tools/benchmark-harness/fixtures/epub/img.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/img.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 20478,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "epub test: img",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/img.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/img.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/epub/simple.json
Normal file
16
tools/benchmark-harness/fixtures/epub/simple.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/simple.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 31662481,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "epub test: simple",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/simple.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/simple.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
23
tools/benchmark-harness/fixtures/epub/test.json
Normal file
23
tools/benchmark-harness/fixtures/epub/test.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/markitdown/epub/test.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 2677,
|
||||
"expected_frameworks": [
|
||||
"kreuzberg",
|
||||
"markitdown",
|
||||
"pandoc",
|
||||
"pymupdf4llm",
|
||||
"tika",
|
||||
"unstructured"
|
||||
],
|
||||
"metadata": {
|
||||
"description": "Document from markitdown test suite",
|
||||
"source": "markitdown",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/test.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/test.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/epub/wasteland.json
Normal file
16
tools/benchmark-harness/fixtures/epub/wasteland.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/wasteland.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 25840,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "EPUB test document: wasteland",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/wasteland.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/wasteland.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/epub/winter_sports.json
Normal file
16
tools/benchmark-harness/fixtures/epub/winter_sports.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/epub/winter_sports.epub",
|
||||
"file_type": "epub",
|
||||
"file_size": 210376,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "epub test: winter_sports",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/epub/winter_sports.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/epub/winter_sports.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user