This commit is contained in:
16
tools/benchmark-harness/fixtures/rtf/accent.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/accent.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/accent.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 50,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: accent",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/accent.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/accent.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/bookmark.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/bookmark.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/bookmark.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 168,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: bookmark",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/bookmark.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/bookmark.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/extraction_test.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/extraction_test.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/extraction_test.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 4262,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: extraction_test",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/extraction_test.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/extraction_test.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/fake-doc.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/fake-doc.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/rtf/fake-doc.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 408,
|
||||
"expected_frameworks": ["kreuzberg", "pandoc", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/fake-doc.txt",
|
||||
"source": "pandoc",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/fake-doc.md"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/footnote.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/footnote.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/footnote.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 563,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: footnote",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/footnote.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/footnote.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/formatting.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/formatting.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/formatting.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 40193,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: formatting",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/formatting.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/formatting.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/heading.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/heading.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/heading.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 45457,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "RTF test document: heading",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/heading.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/heading.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/image.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/image.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/image.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 32795,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: image",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/image.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/image.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/link.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/link.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/link.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 261,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: link",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/link.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/link.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/list_complex.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/list_complex.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/list_complex.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 63224,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "RTF test document: list_complex",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/list_complex.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/list_complex.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/list_simple.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/list_simple.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/list_simple.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 1712,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: list_simple",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/list_simple.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/list_simple.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/lorem_ipsum.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/lorem_ipsum.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/lorem_ipsum.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 3956,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: lorem_ipsum",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/lorem_ipsum.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/lorem_ipsum.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/table_error_codes.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/table_error_codes.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/table_error_codes.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 5300,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "RTF test document: table_error_codes",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/table_error_codes.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/table_error_codes.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/table_simple.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/table_simple.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/table_simple.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 241,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "RTF test document: table_simple",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/table_simple.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/table_simple.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/tables.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/tables.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/tables.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 8499,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: tables",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/tables.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/tables.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/unicode.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/unicode.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/unicode.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 92,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "RTF test document: unicode",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/unicode.md",
|
||||
"source": "pandoc",
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/unicode.txt"
|
||||
}
|
||||
}
|
||||
16
tools/benchmark-harness/fixtures/rtf/word_sample.json
Normal file
16
tools/benchmark-harness/fixtures/rtf/word_sample.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"document": "../../../../test_documents/rtf/word_sample.rtf",
|
||||
"file_type": "rtf",
|
||||
"file_size": 163580,
|
||||
"expected_frameworks": ["kreuzberg"],
|
||||
"metadata": {
|
||||
"description": "rtf test: word_sample",
|
||||
"source": "pandoc-generated",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/rtf/word_sample.txt",
|
||||
"markdown_file": "../../../../test_documents/ground_truth/rtf/word_sample.md",
|
||||
"source": "pandoc"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user