Files

29 lines
706 B
JSON
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
{
"document": "../../../../test_documents/vendored/docling/pdf/redp5110_sampled.pdf",
"file_type": "pdf",
"file_size": 1277938,
"expected_frameworks": [
"kreuzberg",
"docling",
"markitdown",
"mineru",
"pdfminer",
"pdfplumber",
"pdftotext",
"pymupdf4llm",
"pypdf",
"tika",
"unstructured"
],
"metadata": {
"description": "Document from docling test suite",
"source": "docling",
"size_category": "medium"
},
"ground_truth": {
"text_file": "../../../../test_documents/ground_truth/pdf/redp5110_sampled.txt",
"source": "mistral-pixtral",
"markdown_file": "../../../../test_documents/ground_truth/pdf/redp5110_sampled.md"
}
}