17 lines
575 B
JSON
17 lines
575 B
JSON
|
|
{
|
||
|
|
"document": "../../../../test_documents/vendored/unstructured/doc/duplicate-paragraphs.doc",
|
||
|
|
"file_type": "doc",
|
||
|
|
"file_size": 18432,
|
||
|
|
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||
|
|
"metadata": {
|
||
|
|
"description": "Document from unstructured test suite",
|
||
|
|
"source": "unstructured",
|
||
|
|
"size_category": "small"
|
||
|
|
},
|
||
|
|
"ground_truth": {
|
||
|
|
"text_file": "../../../../test_documents/ground_truth/doc/duplicate-paragraphs.txt",
|
||
|
|
"source": "pandoc",
|
||
|
|
"markdown_file": "../../../../test_documents/ground_truth/doc/duplicate-paragraphs.md"
|
||
|
|
}
|
||
|
|
}
|