This commit is contained in:
21
crates/kreuzberg/test_data/hocr/word_confidence.hocr
Normal file
21
crates/kreuzberg/test_data/hocr/word_confidence.hocr
Normal file
@@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
<meta name='ocr-system' content='tesseract 4.0.0-beta.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word'/>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<p id="no_confidence">Foo</p>
|
||||
<p id="x_wconf_given" title="x_wconf 80">Foo</p>
|
||||
<p id="malformed_x_wconf" title="x_wconf eighty">Foo</p>
|
||||
<p id="x_confs_given" title="x_confs 20 7 90">Foo</p>
|
||||
<p id="malformed_x_confs" title="x_confs a b c">Foo</p>
|
||||
<p id="x_wconf_and_x_confs" title="x_wconf 80; x_confs 20 5 90">Foo</p>
|
||||
|
||||
</body>
|
||||
<html>
|
||||
Reference in New Issue
Block a user