This commit is contained in:
31
tools/benchmark-harness/fixtures/pdf_ocr_rotated.json
Normal file
31
tools/benchmark-harness/fixtures/pdf_ocr_rotated.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"document": "../../../test_documents/pdf/ocr_test_rotated_180.pdf",
|
||||
"file_type": "pdf",
|
||||
"file_size": 94703,
|
||||
"expected_frameworks": [
|
||||
"kreuzberg",
|
||||
"docling",
|
||||
"markitdown",
|
||||
"mineru",
|
||||
"pdfminer",
|
||||
"pdfplumber",
|
||||
"pdftotext",
|
||||
"pymupdf4llm",
|
||||
"pypdf",
|
||||
"tika",
|
||||
"unstructured"
|
||||
],
|
||||
"metadata": {
|
||||
"description": "Rotated OCR test PDF - 92KB scanned document with rotation to test OCR engine handling of page orientation",
|
||||
"category": "pdf-ocr",
|
||||
"size_class": "small",
|
||||
"requires_ocr": true,
|
||||
"special_handling": "rotated_pages"
|
||||
},
|
||||
"ground_truth": {
|
||||
"text_file": "../../../test_documents/ground_truth/pdf/pdf_ocr_rotated_180.md",
|
||||
"source": "vision",
|
||||
"markdown_file": "../../../test_documents/ground_truth/pdf/pdf_ocr_rotated_180.md"
|
||||
},
|
||||
"ground_truth_text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
|
||||
}
|
||||
Reference in New Issue
Block a user