Files
fil/tools/benchmark-harness/fixtures/pdf_ocr_rotated.json

32 lines
938 B
JSON
Raw Normal View History

2026-06-01 23:40:55 +02:00
{
"document": "../../../test_documents/pdf/ocr_test_rotated_180.pdf",
"file_type": "pdf",
"file_size": 94703,
"expected_frameworks": [
"kreuzberg",
"docling",
"markitdown",
"mineru",
"pdfminer",
"pdfplumber",
"pdftotext",
"pymupdf4llm",
"pypdf",
"tika",
"unstructured"
],
"metadata": {
"description": "Rotated OCR test PDF - 92KB scanned document with rotation to test OCR engine handling of page orientation",
"category": "pdf-ocr",
"size_class": "small",
"requires_ocr": true,
"special_handling": "rotated_pages"
},
"ground_truth": {
"text_file": "../../../test_documents/ground_truth/pdf/pdf_ocr_rotated_180.md",
"source": "vision",
"markdown_file": "../../../test_documents/ground_truth/pdf/pdf_ocr_rotated_180.md"
},
"ground_truth_text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"
}