This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-equals-attachment-filename.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 3297,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-equals-attachment-filename.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-inline-content-disposition.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 657,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-inline-content-disposition.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-no-html-content-1.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 7721,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-no-html-content-1.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-no-utf8-2008-07-16.062410.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 31978,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-no-utf8-2008-07-16.062410.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-no-utf8-2014-03-17.111517.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 14954,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-no-utf8-2014-03-17.111517.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-1.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 16085,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-1.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-2.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 26271,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-2.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-3.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 56028,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-3.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-4.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 34433,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-4.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-replace-mime-encodings-error-5.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 14567,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-replace-mime-encodings-error-5.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/email-with-image.json
Normal file
15
tools/benchmark-harness/fixtures/eml/email-with-image.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/email-with-image.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 296696,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/email-with-image.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-attachment.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1704,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-attachment.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/fake-email-b64.json
Normal file
15
tools/benchmark-harness/fixtures/eml/fake-email-b64.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-b64.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 979,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-b64.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/fake-email-header.json
Normal file
15
tools/benchmark-harness/fixtures/eml/fake-email-header.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-header.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1207,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-header.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-image-embedded.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 297126,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-image-embedded.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-malformed-encoding.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 898,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-malformed-encoding.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-utf-16-be.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1614,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-utf-16-be.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-utf-16-le.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1614,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-utf-16-le.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/fake-email-utf-16.json
Normal file
15
tools/benchmark-harness/fixtures/eml/fake-email-utf-16.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email-utf-16.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1616,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email-utf-16.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/fake-email.json
Normal file
15
tools/benchmark-harness/fixtures/eml/fake-email.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-email.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 807,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-email.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/fake-encrypted.json
Normal file
15
tools/benchmark-harness/fixtures/eml/fake-encrypted.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/fake-encrypted.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 669,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/fake-encrypted.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/family-day.json
Normal file
15
tools/benchmark-harness/fixtures/eml/family-day.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/family-day.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1291,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/family-day.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/mime-attach-mp3.json
Normal file
15
tools/benchmark-harness/fixtures/eml/mime-attach-mp3.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-attach-mp3.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 70911,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-attach-mp3.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-different-plain-html.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 1397,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-different-plain-html.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/mime-html-only.json
Normal file
15
tools/benchmark-harness/fixtures/eml/mime-html-only.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-html-only.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 640,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-html-only.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-multi-to-cc-bcc.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 350,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-multi-to-cc-bcc.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-multipart-digest.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 721,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-multipart-digest.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/mime-no-body.json
Normal file
15
tools/benchmark-harness/fixtures/eml/mime-no-body.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-no-body.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 985,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-no-body.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/mime-no-subject.json
Normal file
15
tools/benchmark-harness/fixtures/eml/mime-no-subject.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-no-subject.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 162,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-no-subject.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/mime-no-to.json
Normal file
15
tools/benchmark-harness/fixtures/eml/mime-no-to.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-no-to.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 264,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-no-to.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/mime-simple.json
Normal file
15
tools/benchmark-harness/fixtures/eml/mime-simple.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-simple.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 452,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-simple.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/mime-word-encoded-subject.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 261,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/mime-word-encoded-subject.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/rfc822-no-date.json
Normal file
15
tools/benchmark-harness/fixtures/eml/rfc822-no-date.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/rfc822-no-date.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 232,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/rfc822-no-date.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/simple-rfc-822.json
Normal file
15
tools/benchmark-harness/fixtures/eml/simple-rfc-822.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/simple-rfc-822.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 679,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/simple-rfc-822.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/test-invalid-date.json
Normal file
15
tools/benchmark-harness/fixtures/eml/test-invalid-date.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/test-invalid-date.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 161,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/test-invalid-date.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/test-iso-8601-date.json
Normal file
15
tools/benchmark-harness/fixtures/eml/test-iso-8601-date.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/test-iso-8601-date.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 135,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/test-iso-8601-date.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
15
tools/benchmark-harness/fixtures/eml/test-rfc2822-date.json
Normal file
15
tools/benchmark-harness/fixtures/eml/test-rfc2822-date.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"document": "../../../../test_documents/vendored/unstructured/eml/test-rfc2822-date.eml",
|
||||
"file_type": "eml",
|
||||
"file_size": 151,
|
||||
"expected_frameworks": ["kreuzberg", "tika", "unstructured"],
|
||||
"metadata": {
|
||||
"description": "Document from unstructured test suite",
|
||||
"source": "unstructured",
|
||||
"size_category": "small"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../../test_documents/ground_truth/eml/test-rfc2822-date.txt",
|
||||
"source": "python_email"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user