Files
fil/tools/benchmark-harness/fixtures/image_scanned.json

26 lines
631 B
JSON
Raw Normal View History

2026-06-01 23:40:55 +02:00
{
"document": "../../../test_documents/images/english_and_korean.png",
"file_type": "png",
"file_size": 305401,
"expected_frameworks": [
"kreuzberg",
"docling",
"markitdown",
"mineru",
"pymupdf4llm",
"tika",
"unstructured"
],
"metadata": {
"description": "Scanned multilingual document with English and Korean text - 305KB PNG",
"category": "image-scanned",
"size_class": "medium",
"requires_ocr": true,
"languages": ["en", "ko"]
},
"ground_truth": {
"text_file": "../../../test_documents/ground_truth/png/image_scanned.md",
"source": "mistral-pixtral"
}
}