This commit is contained in:
97
crates/kreuzberg/test_data/hocr/english_pdf_default.hocr
Normal file
97
crates/kreuzberg/test_data/hocr/english_pdf_default.hocr
Normal file
@@ -0,0 +1,97 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
|
||||
<meta name='ocr-system' content='tesseract 5.5.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_dir ocrp_lang ocrp_wconf'/>
|
||||
</head>
|
||||
<body>
|
||||
<div class='ocr_page' id='page_1' title='image "/var/folders/f8/_s_ks96d60x_6g__y7vft2wc0000gn/T/tmpy65g4kro.png"; bbox 0 0 1275 1651; ppageno 0; scan_res 70 70'>
|
||||
<div class='ocr_carea' id='block_1_1' title="bbox 915 100 1199 152">
|
||||
<p class='ocr_par' id='par_1_1' lang='eng' title="bbox 915 100 1199 152">
|
||||
<span class='ocr_line' id='line_1_1' title="bbox 915 100 982 119; baseline 0 0; x_size 23.75; x_descenders 4.75; x_ascenders 6.477273">
|
||||
<span class='ocrx_word' id='word_1_1' title='bbox 915 100 982 119; x_wconf 90'>IDRH</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_2' title="bbox 915 131 1199 152; baseline -0.004 0; x_size 26.310345; x_descenders 5.3103447; x_ascenders 7">
|
||||
<span class='ocrx_word' id='word_1_2' title='bbox 915 131 1140 152; x_wconf 92'>Non-text-searchable</span>
|
||||
<span class='ocrx_word' id='word_1_3' title='bbox 1149 132 1199 151; x_wconf 96'>PDF</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_2' title="bbox 137 180 1040 303">
|
||||
<p class='ocr_par' id='par_1_2' lang='eng' title="bbox 137 180 1040 303">
|
||||
<span class='ocr_line' id='line_1_3' title="bbox 158 180 1036 206; baseline 0.001 -6; x_size 27; x_descenders 6; x_ascenders 7">
|
||||
<span class='ocrx_word' id='word_1_4' title='bbox 158 180 208 201; x_wconf 96'>This</span>
|
||||
<span class='ocrx_word' id='word_1_5' title='bbox 217 180 234 201; x_wconf 96'>is</span>
|
||||
<span class='ocrx_word' id='word_1_6' title='bbox 242 187 267 201; x_wconf 96'>an</span>
|
||||
<span class='ocrx_word' id='word_1_7' title='bbox 276 180 369 206; x_wconf 96'>example</span>
|
||||
<span class='ocrx_word' id='word_1_8' title='bbox 377 180 403 201; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_9' title='bbox 408 187 420 201; x_wconf 93'>a</span>
|
||||
<span class='ocrx_word' id='word_1_10' title='bbox 427 180 647 201; x_wconf 93'>non-text-searchable</span>
|
||||
<span class='ocrx_word' id='word_1_11' title='bbox 655 181 711 201; x_wconf 96'>PDF.</span>
|
||||
<span class='ocrx_word' id='word_1_12' title='bbox 721 181 813 201; x_wconf 96'>Because</span>
|
||||
<span class='ocrx_word' id='word_1_13' title='bbox 822 180 836 201; x_wconf 96'>it</span>
|
||||
<span class='ocrx_word' id='word_1_14' title='bbox 844 187 886 201; x_wconf 96'>was</span>
|
||||
<span class='ocrx_word' id='word_1_15' title='bbox 895 180 975 201; x_wconf 96'>created</span>
|
||||
<span class='ocrx_word' id='word_1_16' title='bbox 983 180 1036 201; x_wconf 96'>from</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_4' title="bbox 137 212 1023 239; baseline 0 -6; x_size 26; x_descenders 6; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_17' title='bbox 137 219 162 233; x_wconf 96'>an</span>
|
||||
<span class='ocrx_word' id='word_1_18' title='bbox 171 213 238 239; x_wconf 96'>image</span>
|
||||
<span class='ocrx_word' id='word_1_19' title='bbox 246 213 311 233; x_wconf 96'>rather</span>
|
||||
<span class='ocrx_word' id='word_1_20' title='bbox 318 213 366 233; x_wconf 95'>than</span>
|
||||
<span class='ocrx_word' id='word_1_21' title='bbox 374 219 386 233; x_wconf 95'>a</span>
|
||||
<span class='ocrx_word' id='word_1_22' title='bbox 394 216 435 233; x_wconf 96'>text</span>
|
||||
<span class='ocrx_word' id='word_1_23' title='bbox 443 213 559 237; x_wconf 96'>document,</span>
|
||||
<span class='ocrx_word' id='word_1_24' title='bbox 569 213 583 233; x_wconf 96'>it</span>
|
||||
<span class='ocrx_word' id='word_1_25' title='bbox 591 216 665 233; x_wconf 96'>cannot</span>
|
||||
<span class='ocrx_word' id='word_1_26' title='bbox 673 213 699 233; x_wconf 96'>be</span>
|
||||
<span class='ocrx_word' id='word_1_27' title='bbox 707 213 804 233; x_wconf 96'>rendered</span>
|
||||
<span class='ocrx_word' id='word_1_28' title='bbox 812 219 834 233; x_wconf 96'>as</span>
|
||||
<span class='ocrx_word' id='word_1_29' title='bbox 842 212 898 239; x_wconf 96'>plain</span>
|
||||
<span class='ocrx_word' id='word_1_30' title='bbox 906 216 947 233; x_wconf 96'>text</span>
|
||||
<span class='ocrx_word' id='word_1_31' title='bbox 955 213 982 239; x_wconf 96'>by</span>
|
||||
<span class='ocrx_word' id='word_1_32' title='bbox 990 213 1023 233; x_wconf 96'>the</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_5' title="bbox 137 245 1019 271; baseline 0 -6; x_size 26; x_descenders 6; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_33' title='bbox 137 246 187 265; x_wconf 96'>PDF</span>
|
||||
<span class='ocrx_word' id='word_1_34' title='bbox 195 245 270 265; x_wconf 96'>reader.</span>
|
||||
<span class='ocrx_word' id='word_1_35' title='bbox 280 245 342 269; x_wconf 96'>Thus,</span>
|
||||
<span class='ocrx_word' id='word_1_36' title='bbox 351 245 470 271; x_wconf 95'>attempting</span>
|
||||
<span class='ocrx_word' id='word_1_37' title='bbox 478 248 499 265; x_wconf 96'>to</span>
|
||||
<span class='ocrx_word' id='word_1_38' title='bbox 508 245 571 265; x_wconf 96'>select</span>
|
||||
<span class='ocrx_word' id='word_1_39' title='bbox 578 245 612 265; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_40' title='bbox 620 248 662 265; x_wconf 96'>text</span>
|
||||
<span class='ocrx_word' id='word_1_41' title='bbox 670 251 697 265; x_wconf 96'>on</span>
|
||||
<span class='ocrx_word' id='word_1_42' title='bbox 705 245 738 265; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_43' title='bbox 746 251 798 271; x_wconf 97'>page</span>
|
||||
<span class='ocrx_word' id='word_1_44' title='bbox 807 251 829 265; x_wconf 97'>as</span>
|
||||
<span class='ocrx_word' id='word_1_45' title='bbox 837 245 915 271; x_wconf 96'>though</span>
|
||||
<span class='ocrx_word' id='word_1_46' title='bbox 923 245 938 265; x_wconf 96'>it</span>
|
||||
<span class='ocrx_word' id='word_1_47' title='bbox 946 251 999 265; x_wconf 96'>were</span>
|
||||
<span class='ocrx_word' id='word_1_48' title='bbox 1007 251 1019 265; x_wconf 96'>a</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_6' title="bbox 137 277 1040 303; baseline -0.001 -5; x_size 26; x_descenders 5; x_ascenders 7">
|
||||
<span class='ocrx_word' id='word_1_49' title='bbox 137 281 179 298; x_wconf 96'>text</span>
|
||||
<span class='ocrx_word' id='word_1_50' title='bbox 187 277 297 298; x_wconf 96'>document</span>
|
||||
<span class='ocrx_word' id='word_1_51' title='bbox 305 284 328 298; x_wconf 96'>or</span>
|
||||
<span class='ocrx_word' id='word_1_52' title='bbox 335 277 420 298; x_wconf 96'>website</span>
|
||||
<span class='ocrx_word' id='word_1_53' title='bbox 428 277 470 297; x_wconf 96'>will</span>
|
||||
<span class='ocrx_word' id='word_1_54' title='bbox 478 281 514 298; x_wconf 97'>not</span>
|
||||
<span class='ocrx_word' id='word_1_55' title='bbox 522 277 585 302; x_wconf 96'>work,</span>
|
||||
<span class='ocrx_word' id='word_1_56' title='bbox 594 277 706 303; x_wconf 96'>regardless</span>
|
||||
<span class='ocrx_word' id='word_1_57' title='bbox 715 277 741 298; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_58' title='bbox 745 277 793 298; x_wconf 96'>how</span>
|
||||
<span class='ocrx_word' id='word_1_59' title='bbox 800 277 867 303; x_wconf 96'>neatly</span>
|
||||
<span class='ocrx_word' id='word_1_60' title='bbox 876 277 891 298; x_wconf 96'>it</span>
|
||||
<span class='ocrx_word' id='word_1_61' title='bbox 899 277 916 298; x_wconf 96'>is</span>
|
||||
<span class='ocrx_word' id='word_1_62' title='bbox 925 277 1040 303; x_wconf 96'>organized.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
492
crates/kreuzberg/test_data/hocr/german_pdf_default.hocr
Normal file
492
crates/kreuzberg/test_data/hocr/german_pdf_default.hocr
Normal file
@@ -0,0 +1,492 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
|
||||
<meta name='ocr-system' content='tesseract 5.5.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_dir ocrp_lang ocrp_wconf'/>
|
||||
</head>
|
||||
<body>
|
||||
<div class='ocr_page' id='page_1' title='image "/var/folders/f8/_s_ks96d60x_6g__y7vft2wc0000gn/T/tmp5mkdsq_s.png"; bbox 0 0 1241 1754; ppageno 0; scan_res 70 70'>
|
||||
<div class='ocr_carea' id='block_1_1' title="bbox 728 64 1175 163">
|
||||
<p class='ocr_par' id='par_1_1' lang='eng' title="bbox 728 64 1175 163">
|
||||
<span class='ocr_line' id='line_1_1' title="bbox 728 64 1175 163; baseline -0.002 -27; x_size 56; x_descenders 12; x_ascenders 16">
|
||||
<span class='ocrx_word' id='word_1_1' title='bbox 728 64 805 163; x_wconf 67'>M</span>
|
||||
<span class='ocrx_word' id='word_1_2' title='bbox 819 96 992 160; x_wconf 96'>Heimat</span>
|
||||
<span class='ocrx_word' id='word_1_3' title='bbox 1012 98 1175 147; x_wconf 96'>Bayern</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_2' title="bbox 819 152 1171 167">
|
||||
<p class='ocr_par' id='par_1_2' lang='eng' title="bbox 819 152 1171 167">
|
||||
<span class='ocr_line' id='line_1_2' title="bbox 819 152 1171 167; baseline 0.003 -4; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_4' title='bbox 819 152 895 167; x_wconf 87'>Bayerischer</span>
|
||||
<span class='ocrx_word' id='word_1_5' title='bbox 927 155 1001 164; x_wconf 93'>Landesverein</span>
|
||||
<span class='ocrx_word' id='word_1_6' title='bbox 1014 152 1043 164; x_wconf 71'>fiir</span>
|
||||
<span class='ocrx_word' id='word_1_7' title='bbox 1053 152 1141 167; x_wconf 90'>Heimatpflege</span>
|
||||
<span class='ocrx_word' id='word_1_8' title='bbox 1148 153 1171 164; x_wconf 85'>e.V.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_photo' id='block_1_3' title="bbox 910 158 944 164"></div>
|
||||
<div class='ocr_carea' id='block_1_4' title="bbox 897 200 1089 265">
|
||||
<p class='ocr_par' id='par_1_3' lang='eng' title="bbox 897 200 1089 265">
|
||||
<span class='ocr_line' id='line_1_3' title="bbox 898 200 1027 217; baseline 0 0; x_size 22.279999; x_descenders 5.2799997; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_9' title='bbox 898 200 1027 217; x_wconf 57'>Geschéftsstelle</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_4' title="bbox 898 224 1089 245; baseline 0 -4; x_size 21; x_descenders 4; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_10' title='bbox 898 224 1015 245; x_wconf 92'>LudwigstraBe</span>
|
||||
<span class='ocrx_word' id='word_1_11' title='bbox 1022 225 1043 241; x_wconf 93'>23</span>
|
||||
<span class='ocrx_word' id='word_1_12' title='bbox 1051 224 1089 245; x_wconf 91'>Rgb.</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_5' title="bbox 897 248 1043 265; baseline 0 0; x_size 22.279999; x_descenders 5.2799997; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_13' title='bbox 897 249 953 265; x_wconf 96'>80539</span>
|
||||
<span class='ocrx_word' id='word_1_14' title='bbox 961 248 1043 265; x_wconf 55'>Munchen</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_5' title="bbox 896 296 1133 366">
|
||||
<p class='ocr_par' id='par_1_4' lang='eng' title="bbox 896 296 1133 366">
|
||||
<span class='ocr_line' id='line_1_6' title="bbox 896 296 1123 314; baseline 0.004 -1; x_size 22.26087; x_descenders 5.2608695; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_15' title='bbox 896 296 961 313; x_wconf 92'>Telefon</span>
|
||||
<span class='ocrx_word' id='word_1_16' title='bbox 985 298 1018 314; x_wconf 32'>089</span>
|
||||
<span class='ocrx_word' id='word_1_17' title='bbox 1024 298 1123 314; x_wconf 32'>/286629-0</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_7' title="bbox 896 321 1133 338; baseline 0.004 -1; x_size 20.888889; x_descenders 4.8888888; x_ascenders 5.7777777">
|
||||
<span class='ocrx_word' id='word_1_18' title='bbox 896 321 958 338; x_wconf 88'>Telefax</span>
|
||||
<span class='ocrx_word' id='word_1_19' title='bbox 984 322 1017 338; x_wconf 78'>089</span>
|
||||
<span class='ocrx_word' id='word_1_20' title='bbox 1024 322 1030 338; x_wconf 60'>/</span>
|
||||
<span class='ocrx_word' id='word_1_21' title='bbox 1036 322 1133 338; x_wconf 60'>286629-28</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_8' title="bbox 897 345 1102 366; baseline 0.005 -5; x_size 21; x_descenders 4; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_22' title='bbox 897 345 1102 366; x_wconf 91'>info@heimat-bayern.de</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_6' title="bbox 139 480 705 525">
|
||||
<p class='ocr_par' id='par_1_5' lang='eng' title="bbox 139 480 705 525">
|
||||
<span class='ocr_line' id='line_1_9' title="bbox 139 480 705 504; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_23' title='bbox 139 481 249 504; x_wconf 91'>Bayerisches</span>
|
||||
<span class='ocrx_word' id='word_1_24' title='bbox 257 481 427 499; x_wconf 92'>Staatsministerium</span>
|
||||
<span class='ocrx_word' id='word_1_25' title='bbox 435 480 463 499; x_wconf 84'>fiir</span>
|
||||
<span class='ocrx_word' id='word_1_26' title='bbox 469 480 597 499; x_wconf 96'>Wissenschaft</span>
|
||||
<span class='ocrx_word' id='word_1_27' title='bbox 605 481 641 499; x_wconf 97'>und</span>
|
||||
<span class='ocrx_word' id='word_1_28' title='bbox 651 482 705 499; x_wconf 96'>Kunst</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_10' title="bbox 139 507 234 525; baseline 0 0; x_size 22.906475; x_descenders 4.9064751; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_29' title='bbox 139 509 168 525; x_wconf 95'>Per</span>
|
||||
<span class='ocrx_word' id='word_1_30' title='bbox 176 507 234 525; x_wconf 93'>E-Mail</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_7' title="bbox 814 640 1082 663">
|
||||
<p class='ocr_par' id='par_1_6' lang='eng' title="bbox 814 640 1082 663">
|
||||
<span class='ocr_line' id='line_1_11' title="bbox 814 640 1082 663; baseline 0.004 -5; x_size 23; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_31' title='bbox 814 640 910 661; x_wconf 84'>Miinchen,</span>
|
||||
<span class='ocrx_word' id='word_1_32' title='bbox 919 642 947 658; x_wconf 96'>01.</span>
|
||||
<span class='ocrx_word' id='word_1_33' title='bbox 956 642 1026 663; x_wconf 96'>August</span>
|
||||
<span class='ocrx_word' id='word_1_34' title='bbox 1033 642 1082 659; x_wconf 96'>2025</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_8' title="bbox 137 748 669 771">
|
||||
<p class='ocr_par' id='par_1_7' lang='eng' title="bbox 137 748 669 771">
|
||||
<span class='ocr_line' id='line_1_12' title="bbox 137 748 669 771; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_35' title='bbox 137 748 300 771; x_wconf 90'>Stellungnahme</span>
|
||||
<span class='ocrx_word' id='word_1_36' title='bbox 308 748 451 771; x_wconf 90'>Archivgesetz;</span>
|
||||
<span class='ocrx_word' id='word_1_37' title='bbox 459 748 669 771; x_wconf 59'>Verbandeanhorung</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_9' title="bbox 137 828 460 851">
|
||||
<p class='ocr_par' id='par_1_8' lang='eng' title="bbox 137 828 460 851">
|
||||
<span class='ocr_line' id='line_1_13' title="bbox 137 828 460 851; baseline 0 -5; x_size 21; x_descenders 3; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_38' title='bbox 137 828 179 846; x_wconf 93'>Sehr</span>
|
||||
<span class='ocrx_word' id='word_1_39' title='bbox 187 828 262 851; x_wconf 92'>geehrte</span>
|
||||
<span class='ocrx_word' id='word_1_40' title='bbox 271 829 338 846; x_wconf 96'>Damen</span>
|
||||
<span class='ocrx_word' id='word_1_41' title='bbox 347 828 382 846; x_wconf 96'>und</span>
|
||||
<span class='ocrx_word' id='word_1_42' title='bbox 391 829 460 849; x_wconf 96'>Herren,</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_10' title="bbox 136 879 1080 1011">
|
||||
<p class='ocr_par' id='par_1_9' lang='eng' title="bbox 136 879 1080 1011">
|
||||
<span class='ocr_line' id='line_1_14' title="bbox 137 879 1076 904; baseline 0.001 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_43' title='bbox 137 881 200 899; x_wconf 96'>besten</span>
|
||||
<span class='ocrx_word' id='word_1_44' title='bbox 209 881 259 899; x_wconf 96'>Dank</span>
|
||||
<span class='ocrx_word' id='word_1_45' title='bbox 266 881 294 899; x_wconf 84'>fiir</span>
|
||||
<span class='ocrx_word' id='word_1_46' title='bbox 302 881 329 899; x_wconf 93'>die</span>
|
||||
<span class='ocrx_word' id='word_1_47' title='bbox 338 881 459 904; x_wconf 93'>Gelegenheit,</span>
|
||||
<span class='ocrx_word' id='word_1_48' title='bbox 467 886 488 899; x_wconf 96'>zu</span>
|
||||
<span class='ocrx_word' id='word_1_49' title='bbox 497 881 531 899; x_wconf 93'>den</span>
|
||||
<span class='ocrx_word' id='word_1_50' title='bbox 540 881 637 904; x_wconf 92'>geplanten</span>
|
||||
<span class='ocrx_word' id='word_1_51' title='bbox 645 879 766 904; x_wconf 93'>Anderungen</span>
|
||||
<span class='ocrx_word' id='word_1_52' title='bbox 775 881 806 899; x_wconf 93'>des</span>
|
||||
<span class='ocrx_word' id='word_1_53' title='bbox 815 881 925 904; x_wconf 92'>Bayerischen</span>
|
||||
<span class='ocrx_word' id='word_1_54' title='bbox 934 881 1076 904; x_wconf 92'>Archivgesetzes</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_15' title="bbox 137 907 1080 931; baseline 0 -5; x_size 23; x_descenders 4; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_55' title='bbox 137 907 214 930; x_wconf 92'>Stellung</span>
|
||||
<span class='ocrx_word' id='word_1_56' title='bbox 222 913 244 926; x_wconf 93'>zu</span>
|
||||
<span class='ocrx_word' id='word_1_57' title='bbox 253 907 344 926; x_wconf 83'>beziehen.</span>
|
||||
<span class='ocrx_word' id='word_1_58' title='bbox 354 909 386 926; x_wconf 96'>Der</span>
|
||||
<span class='ocrx_word' id='word_1_59' title='bbox 395 907 494 930; x_wconf 93'>Bayerische</span>
|
||||
<span class='ocrx_word' id='word_1_60' title='bbox 502 907 626 926; x_wconf 93'>Landesverein</span>
|
||||
<span class='ocrx_word' id='word_1_61' title='bbox 635 907 662 926; x_wconf 90'>ftir</span>
|
||||
<span class='ocrx_word' id='word_1_62' title='bbox 671 907 800 930; x_wconf 92'>Heimatpflege</span>
|
||||
<span class='ocrx_word' id='word_1_63' title='bbox 808 909 842 926; x_wconf 87'>e.V.</span>
|
||||
<span class='ocrx_word' id='word_1_64' title='bbox 852 908 872 926; x_wconf 96'>ist</span>
|
||||
<span class='ocrx_word' id='word_1_65' title='bbox 879 907 933 926; x_wconf 96'>durch</span>
|
||||
<span class='ocrx_word' id='word_1_66' title='bbox 941 908 989 926; x_wconf 93'>seine</span>
|
||||
<span class='ocrx_word' id='word_1_67' title='bbox 997 907 1080 931; x_wconf 91'>Tatigkeit</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_16' title="bbox 137 934 996 957; baseline -0.001 -4; x_size 23; x_descenders 4; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_68' title='bbox 137 934 173 953; x_wconf 96'>und</span>
|
||||
<span class='ocrx_word' id='word_1_69' title='bbox 181 935 230 953; x_wconf 93'>seine</span>
|
||||
<span class='ocrx_word' id='word_1_70' title='bbox 238 934 333 957; x_wconf 92'>Aufgaben</span>
|
||||
<span class='ocrx_word' id='word_1_71' title='bbox 342 934 357 952; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_72' title='bbox 366 934 397 953; x_wconf 92'>der</span>
|
||||
<span class='ocrx_word' id='word_1_73' title='bbox 405 934 570 957; x_wconf 92'>Heimatforschung</span>
|
||||
<span class='ocrx_word' id='word_1_74' title='bbox 577 934 680 953; x_wconf 45'>tatsachlich</span>
|
||||
<span class='ocrx_word' id='word_1_75' title='bbox 689 934 751 953; x_wconf 95'>immer</span>
|
||||
<span class='ocrx_word' id='word_1_76' title='bbox 757 934 823 953; x_wconf 96'>wieder</span>
|
||||
<span class='ocrx_word' id='word_1_77' title='bbox 830 934 860 953; x_wconf 93'>mit</span>
|
||||
<span class='ocrx_word' id='word_1_78' title='bbox 869 934 955 957; x_wconf 92'>Belangen</span>
|
||||
<span class='ocrx_word' id='word_1_79' title='bbox 965 934 996 953; x_wconf 96'>des</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_17' title="bbox 136 960 1053 984; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_80' title='bbox 136 961 280 984; x_wconf 91'>Archivgesetzes</span>
|
||||
<span class='ocrx_word' id='word_1_81' title='bbox 289 960 360 979; x_wconf 93'>befasst.</span>
|
||||
<span class='ocrx_word' id='word_1_82' title='bbox 369 961 403 979; x_wconf 96'>Wir</span>
|
||||
<span class='ocrx_word' id='word_1_83' title='bbox 411 961 468 979; x_wconf 96'>haben</span>
|
||||
<span class='ocrx_word' id='word_1_84' title='bbox 477 961 512 979; x_wconf 93'>den</span>
|
||||
<span class='ocrx_word' id='word_1_85' title='bbox 520 961 635 984; x_wconf 93'>vorgelegten</span>
|
||||
<span class='ocrx_word' id='word_1_86' title='bbox 644 960 787 979; x_wconf 92'>Gesetzentwurf</span>
|
||||
<span class='ocrx_word' id='word_1_87' title='bbox 795 960 865 984; x_wconf 88'>gepriift</span>
|
||||
<span class='ocrx_word' id='word_1_88' title='bbox 874 961 908 979; x_wconf 96'>und</span>
|
||||
<span class='ocrx_word' id='word_1_89' title='bbox 917 961 954 979; x_wconf 96'>sind</span>
|
||||
<span class='ocrx_word' id='word_1_90' title='bbox 963 961 1014 979; x_wconf 95'>dabei</span>
|
||||
<span class='ocrx_word' id='word_1_91' title='bbox 1022 961 1053 979; x_wconf 95'>auf</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_18' title="bbox 136 987 920 1011; baseline 0 -5; x_size 24; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_92' title='bbox 136 987 221 1011; x_wconf 92'>geplante</span>
|
||||
<span class='ocrx_word' id='word_1_93' title='bbox 230 989 348 1011; x_wconf 92'>Neuerungen</span>
|
||||
<span class='ocrx_word' id='word_1_94' title='bbox 357 987 450 1011; x_wconf 93'>gestoBen,</span>
|
||||
<span class='ocrx_word' id='word_1_95' title='bbox 460 988 487 1006; x_wconf 96'>die</span>
|
||||
<span class='ocrx_word' id='word_1_96' title='bbox 495 993 526 1006; x_wconf 96'>aus</span>
|
||||
<span class='ocrx_word' id='word_1_97' title='bbox 535 993 607 1006; x_wconf 93'>unserer</span>
|
||||
<span class='ocrx_word' id='word_1_98' title='bbox 614 987 661 1006; x_wconf 93'>Sicht</span>
|
||||
<span class='ocrx_word' id='word_1_99' title='bbox 669 987 767 1006; x_wconf 0'>tberdacht</span>
|
||||
<span class='ocrx_word' id='word_1_100' title='bbox 773 987 845 1006; x_wconf 93'>werden</span>
|
||||
<span class='ocrx_word' id='word_1_101' title='bbox 854 987 920 1006; x_wconf 90'>sollten.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_11' title="bbox 135 1041 1075 1329">
|
||||
<p class='ocr_par' id='par_1_10' lang='eng' title="bbox 135 1041 1075 1329">
|
||||
<span class='ocr_line' id='line_1_19' title="bbox 138 1041 1006 1064; baseline 0 -5; x_size 21; x_descenders 3; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_102' title='bbox 138 1042 152 1059; x_wconf 86'>1.</span>
|
||||
<span class='ocrx_word' id='word_1_103' title='bbox 161 1042 197 1059; x_wconf 96'>Aus</span>
|
||||
<span class='ocrx_word' id='word_1_104' title='bbox 205 1046 278 1059; x_wconf 93'>unserer</span>
|
||||
<span class='ocrx_word' id='word_1_105' title='bbox 286 1041 331 1059; x_wconf 93'>Sicht</span>
|
||||
<span class='ocrx_word' id='word_1_106' title='bbox 339 1041 359 1059; x_wconf 96'>ist</span>
|
||||
<span class='ocrx_word' id='word_1_107' title='bbox 367 1046 386 1059; x_wconf 96'>es</span>
|
||||
<span class='ocrx_word' id='word_1_108' title='bbox 394 1041 440 1059; x_wconf 93'>nicht</span>
|
||||
<span class='ocrx_word' id='word_1_109' title='bbox 448 1041 615 1064; x_wconf 89'>unproblematisch,</span>
|
||||
<span class='ocrx_word' id='word_1_110' title='bbox 624 1041 658 1059; x_wconf 96'>wie</span>
|
||||
<span class='ocrx_word' id='word_1_111' title='bbox 665 1042 700 1059; x_wconf 96'>Art.</span>
|
||||
<span class='ocrx_word' id='word_1_112' title='bbox 709 1042 721 1059; x_wconf 96'>4</span>
|
||||
<span class='ocrx_word' id='word_1_113' title='bbox 727 1041 769 1059; x_wconf 94'>Abs.</span>
|
||||
<span class='ocrx_word' id='word_1_114' title='bbox 779 1042 788 1059; x_wconf 94'>5</span>
|
||||
<span class='ocrx_word' id='word_1_115' title='bbox 797 1041 833 1059; x_wconf 96'>und</span>
|
||||
<span class='ocrx_word' id='word_1_116' title='bbox 841 1042 875 1059; x_wconf 95'>Art.</span>
|
||||
<span class='ocrx_word' id='word_1_117' title='bbox 884 1042 894 1059; x_wconf 95'>5</span>
|
||||
<span class='ocrx_word' id='word_1_118' title='bbox 902 1041 943 1059; x_wconf 95'>Abs.</span>
|
||||
<span class='ocrx_word' id='word_1_119' title='bbox 954 1042 960 1059; x_wconf 95'>1</span>
|
||||
<span class='ocrx_word' id='word_1_120' title='bbox 972 1046 1006 1059; x_wconf 96'>neu</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_20' title="bbox 136 1067 1022 1091; baseline 0 -6; x_size 23; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_121' title='bbox 136 1067 204 1091; x_wconf 92'>gefasst</span>
|
||||
<span class='ocrx_word' id='word_1_122' title='bbox 211 1068 290 1085; x_wconf 96'>werden.</span>
|
||||
<span class='ocrx_word' id='word_1_123' title='bbox 300 1069 317 1085; x_wconf 93'>Er</span>
|
||||
<span class='ocrx_word' id='word_1_124' title='bbox 324 1067 373 1085; x_wconf 93'>wirkt</span>
|
||||
<span class='ocrx_word' id='word_1_125' title='bbox 381 1068 514 1090; x_wconf 92'>beispielsweise</span>
|
||||
<span class='ocrx_word' id='word_1_126' title='bbox 523 1067 633 1088; x_wconf 93'>bedenklich,</span>
|
||||
<span class='ocrx_word' id='word_1_127' title='bbox 642 1067 683 1085; x_wconf 96'>dass</span>
|
||||
<span class='ocrx_word' id='word_1_128' title='bbox 691 1067 719 1085; x_wconf 96'>die</span>
|
||||
<span class='ocrx_word' id='word_1_129' title='bbox 728 1069 814 1090; x_wconf 93'>Beratung</span>
|
||||
<span class='ocrx_word' id='word_1_130' title='bbox 823 1067 943 1085; x_wconf 92'>kommunaler</span>
|
||||
<span class='ocrx_word' id='word_1_131' title='bbox 950 1068 1022 1086; x_wconf 96'>Archive</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_21' title="bbox 136 1094 1048 1117; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_132' title='bbox 136 1094 190 1112; x_wconf 93'>durch</span>
|
||||
<span class='ocrx_word' id='word_1_133' title='bbox 198 1094 337 1112; x_wconf 91'>ehrenamtliche</span>
|
||||
<span class='ocrx_word' id='word_1_134' title='bbox 344 1094 473 1117; x_wconf 93'>Archivpfleger</span>
|
||||
<span class='ocrx_word' id='word_1_135' title='bbox 481 1094 611 1117; x_wconf 51'>gewahrleistet</span>
|
||||
<span class='ocrx_word' id='word_1_136' title='bbox 618 1094 655 1112; x_wconf 96'>sein</span>
|
||||
<span class='ocrx_word' id='word_1_137' title='bbox 663 1094 700 1115; x_wconf 91'>soll,</span>
|
||||
<span class='ocrx_word' id='word_1_138' title='bbox 708 1094 828 1117; x_wconf 91'>wohingegen</span>
|
||||
<span class='ocrx_word' id='word_1_139' title='bbox 837 1094 926 1112; x_wconf 92'>staatliche</span>
|
||||
<span class='ocrx_word' id='word_1_140' title='bbox 934 1094 1004 1112; x_wconf 96'>Archive</span>
|
||||
<span class='ocrx_word' id='word_1_141' title='bbox 1013 1094 1048 1112; x_wconf 96'>hier</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_22' title="bbox 136 1121 995 1143; baseline 0 -4; x_size 22; x_descenders 4; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_142' title='bbox 136 1126 168 1139; x_wconf 95'>nur</span>
|
||||
<span class='ocrx_word' id='word_1_143' title='bbox 176 1121 197 1139; x_wconf 93'>im</span>
|
||||
<span class='ocrx_word' id='word_1_144' title='bbox 207 1121 286 1139; x_wconf 90'>Rahmen</span>
|
||||
<span class='ocrx_word' id='word_1_145' title='bbox 295 1121 338 1139; x_wconf 93'>ihrer</span>
|
||||
<span class='ocrx_word' id='word_1_146' title='bbox 346 1121 518 1143; x_wconf 90'>Leistungsfahigkeit</span>
|
||||
<span class='ocrx_word' id='word_1_147' title='bbox 525 1121 569 1143; x_wconf 92'>tatig</span>
|
||||
<span class='ocrx_word' id='word_1_148' title='bbox 576 1121 650 1139; x_wconf 93'>werden</span>
|
||||
<span class='ocrx_word' id='word_1_149' title='bbox 658 1121 718 1139; x_wconf 91'>sollen.</span>
|
||||
<span class='ocrx_word' id='word_1_150' title='bbox 728 1121 782 1139; x_wconf 92'>Somit</span>
|
||||
<span class='ocrx_word' id='word_1_151' title='bbox 790 1121 810 1139; x_wconf 96'>ist</span>
|
||||
<span class='ocrx_word' id='word_1_152' title='bbox 817 1121 856 1139; x_wconf 93'>eine</span>
|
||||
<span class='ocrx_word' id='word_1_153' title='bbox 865 1121 995 1143; x_wconf 92'>professionelle</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_23' title="bbox 136 1147 1034 1170; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_154' title='bbox 136 1149 237 1170; x_wconf 91'>Betreuung</span>
|
||||
<span class='ocrx_word' id='word_1_155' title='bbox 246 1147 293 1165; x_wconf 93'>nicht</span>
|
||||
<span class='ocrx_word' id='word_1_156' title='bbox 300 1148 399 1170; x_wconf 91'>garantiert.</span>
|
||||
<span class='ocrx_word' id='word_1_157' title='bbox 408 1148 454 1165; x_wconf 93'>Fasst</span>
|
||||
<span class='ocrx_word' id='word_1_158' title='bbox 462 1152 502 1165; x_wconf 96'>man</span>
|
||||
<span class='ocrx_word' id='word_1_159' title='bbox 511 1148 536 1165; x_wconf 96'>ins</span>
|
||||
<span class='ocrx_word' id='word_1_160' title='bbox 543 1148 600 1170; x_wconf 96'>Auge,</span>
|
||||
<span class='ocrx_word' id='word_1_161' title='bbox 609 1147 651 1165; x_wconf 96'>dass</span>
|
||||
<span class='ocrx_word' id='word_1_162' title='bbox 658 1152 688 1165; x_wconf 96'>vor</span>
|
||||
<span class='ocrx_word' id='word_1_163' title='bbox 695 1147 746 1165; x_wconf 96'>allem</span>
|
||||
<span class='ocrx_word' id='word_1_164' title='bbox 754 1147 825 1165; x_wconf 93'>Archive</span>
|
||||
<span class='ocrx_word' id='word_1_165' title='bbox 834 1147 914 1165; x_wconf 92'>kleinerer</span>
|
||||
<span class='ocrx_word' id='word_1_166' title='bbox 923 1148 1034 1165; x_wconf 85'>Kommunen</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_24' title="bbox 135 1174 1002 1197; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_167' title='bbox 135 1174 218 1192; x_wconf 92'>allenfalls</span>
|
||||
<span class='ocrx_word' id='word_1_168' title='bbox 227 1174 370 1192; x_wconf 40'>Gberschaubare</span>
|
||||
<span class='ocrx_word' id='word_1_169' title='bbox 378 1174 495 1197; x_wconf 92'>Erfahrungen</span>
|
||||
<span class='ocrx_word' id='word_1_170' title='bbox 504 1175 535 1192; x_wconf 93'>mit</span>
|
||||
<span class='ocrx_word' id='word_1_171' title='bbox 542 1174 621 1197; x_wconf 93'>digitaler</span>
|
||||
<span class='ocrx_word' id='word_1_172' title='bbox 629 1174 828 1197; x_wconf 92'>Langzeitarchivierung</span>
|
||||
<span class='ocrx_word' id='word_1_173' title='bbox 837 1174 913 1192; x_wconf 92'>besitzen</span>
|
||||
<span class='ocrx_word' id='word_1_174' title='bbox 922 1174 958 1192; x_wconf 96'>und</span>
|
||||
<span class='ocrx_word' id='word_1_175' title='bbox 968 1174 1002 1192; x_wconf 96'>hier</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_25' title="bbox 136 1200 1062 1223; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_176' title='bbox 136 1200 262 1223; x_wconf 91'>Fachpersonal</span>
|
||||
<span class='ocrx_word' id='word_1_177' title='bbox 270 1200 298 1218; x_wconf 94'>fiir</span>
|
||||
<span class='ocrx_word' id='word_1_178' title='bbox 305 1200 333 1218; x_wconf 93'>die</span>
|
||||
<span class='ocrx_word' id='word_1_179' title='bbox 342 1202 440 1223; x_wconf 92'>Betreuung</span>
|
||||
<span class='ocrx_word' id='word_1_180' title='bbox 449 1200 534 1223; x_wconf 91'>dringend</span>
|
||||
<span class='ocrx_word' id='word_1_181' title='bbox 543 1200 647 1223; x_wconf 92'>notwendig</span>
|
||||
<span class='ocrx_word' id='word_1_182' title='bbox 656 1202 708 1221; x_wconf 96'>ware,</span>
|
||||
<span class='ocrx_word' id='word_1_183' title='bbox 718 1201 738 1218; x_wconf 96'>ist</span>
|
||||
<span class='ocrx_word' id='word_1_184' title='bbox 746 1200 776 1218; x_wconf 93'>der</span>
|
||||
<span class='ocrx_word' id='word_1_185' title='bbox 782 1200 932 1223; x_wconf 92'>vorgeschlagene</span>
|
||||
<span class='ocrx_word' id='word_1_186' title='bbox 940 1202 1062 1218; x_wconf 91'>Gesetzestext</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_26' title="bbox 136 1227 1075 1250; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_187' title='bbox 136 1227 183 1245; x_wconf 93'>nicht</span>
|
||||
<span class='ocrx_word' id='word_1_188' title='bbox 191 1227 319 1250; x_wconf 92'>befriedigend.</span>
|
||||
<span class='ocrx_word' id='word_1_189' title='bbox 329 1228 371 1245; x_wconf 93'>Dem</span>
|
||||
<span class='ocrx_word' id='word_1_190' title='bbox 381 1227 473 1245; x_wconf 92'>Ehrenamt</span>
|
||||
<span class='ocrx_word' id='word_1_191' title='bbox 479 1227 521 1245; x_wconf 96'>wird</span>
|
||||
<span class='ocrx_word' id='word_1_192' title='bbox 530 1227 566 1245; x_wconf 96'>hier</span>
|
||||
<span class='ocrx_word' id='word_1_193' title='bbox 573 1233 594 1245; x_wconf 96'>zu</span>
|
||||
<span class='ocrx_word' id='word_1_194' title='bbox 602 1227 633 1245; x_wconf 93'>viel</span>
|
||||
<span class='ocrx_word' id='word_1_195' title='bbox 641 1228 788 1250; x_wconf 92'>Verantwortung</span>
|
||||
<span class='ocrx_word' id='word_1_196' title='bbox 797 1229 905 1250; x_wconf 91'>zugemutet.</span>
|
||||
<span class='ocrx_word' id='word_1_197' title='bbox 914 1227 979 1245; x_wconf 92'>Zudem</span>
|
||||
<span class='ocrx_word' id='word_1_198' title='bbox 989 1230 1045 1245; x_wconf 91'>mutet</span>
|
||||
<span class='ocrx_word' id='word_1_199' title='bbox 1054 1228 1075 1245; x_wconf 96'>im</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_27' title="bbox 136 1253 1026 1276; baseline 0.001 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_200' title='bbox 136 1253 213 1272; x_wconf 92'>Entwurf</span>
|
||||
<span class='ocrx_word' id='word_1_201' title='bbox 219 1255 255 1272; x_wconf 95'>Art.</span>
|
||||
<span class='ocrx_word' id='word_1_202' title='bbox 264 1255 276 1271; x_wconf 95'>4</span>
|
||||
<span class='ocrx_word' id='word_1_203' title='bbox 283 1254 324 1272; x_wconf 94'>Abs.</span>
|
||||
<span class='ocrx_word' id='word_1_204' title='bbox 334 1255 344 1272; x_wconf 94'>5</span>
|
||||
<span class='ocrx_word' id='word_1_205' title='bbox 353 1254 391 1272; x_wconf 96'>Satz</span>
|
||||
<span class='ocrx_word' id='word_1_206' title='bbox 399 1255 410 1271; x_wconf 96'>4</span>
|
||||
<span class='ocrx_word' id='word_1_207' title='bbox 417 1253 457 1274; x_wconf 95'>(wie</span>
|
||||
<span class='ocrx_word' id='word_1_208' title='bbox 463 1254 479 1276; x_wconf 96'>ja</span>
|
||||
<span class='ocrx_word' id='word_1_209' title='bbox 488 1254 532 1272; x_wconf 96'>auch</span>
|
||||
<span class='ocrx_word' id='word_1_210' title='bbox 542 1254 557 1271; x_wconf 93'>in</span>
|
||||
<span class='ocrx_word' id='word_1_211' title='bbox 565 1254 655 1272; x_wconf 81'>ahnlicher</span>
|
||||
<span class='ocrx_word' id='word_1_212' title='bbox 661 1254 719 1272; x_wconf 96'>Weise</span>
|
||||
<span class='ocrx_word' id='word_1_213' title='bbox 727 1255 761 1272; x_wconf 96'>Art.</span>
|
||||
<span class='ocrx_word' id='word_1_214' title='bbox 770 1255 782 1271; x_wconf 96'>4</span>
|
||||
<span class='ocrx_word' id='word_1_215' title='bbox 788 1254 829 1272; x_wconf 96'>Abs.</span>
|
||||
<span class='ocrx_word' id='word_1_216' title='bbox 840 1255 849 1271; x_wconf 96'>5</span>
|
||||
<span class='ocrx_word' id='word_1_217' title='bbox 858 1254 897 1272; x_wconf 96'>Satz</span>
|
||||
<span class='ocrx_word' id='word_1_218' title='bbox 904 1254 914 1272; x_wconf 96'>3</span>
|
||||
<span class='ocrx_word' id='word_1_219' title='bbox 923 1254 954 1272; x_wconf 93'>der</span>
|
||||
<span class='ocrx_word' id='word_1_220' title='bbox 962 1254 1026 1272; x_wconf 92'>derzeit</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_28' title="bbox 136 1280 1065 1303; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_221' title='bbox 136 1280 182 1298; x_wconf 93'>noch</span>
|
||||
<span class='ocrx_word' id='word_1_222' title='bbox 190 1280 288 1303; x_wconf 92'>geltenden</span>
|
||||
<span class='ocrx_word' id='word_1_223' title='bbox 297 1280 378 1303; x_wconf 93'>Fassung)</span>
|
||||
<span class='ocrx_word' id='word_1_224' title='bbox 386 1283 441 1298; x_wconf 96'>etwas</span>
|
||||
<span class='ocrx_word' id='word_1_225' title='bbox 450 1280 550 1298; x_wconf 96'>redundant</span>
|
||||
<span class='ocrx_word' id='word_1_226' title='bbox 557 1285 586 1301; x_wconf 96'>an,</span>
|
||||
<span class='ocrx_word' id='word_1_227' title='bbox 595 1280 617 1298; x_wconf 93'>da</span>
|
||||
<span class='ocrx_word' id='word_1_228' title='bbox 626 1280 738 1298; x_wconf 75'>6ffentliches</span>
|
||||
<span class='ocrx_word' id='word_1_229' title='bbox 747 1281 830 1298; x_wconf 91'>Interesse</span>
|
||||
<span class='ocrx_word' id='word_1_230' title='bbox 838 1280 914 1298; x_wconf 92'>ohnehin</span>
|
||||
<span class='ocrx_word' id='word_1_231' title='bbox 922 1281 1065 1303; x_wconf 92'>Voraussetzung</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_29' title="bbox 135 1307 566 1329; baseline 0 -4; x_size 22; x_descenders 4; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_232' title='bbox 135 1307 163 1325; x_wconf 93'>fiir</span>
|
||||
<span class='ocrx_word' id='word_1_233' title='bbox 170 1307 275 1329; x_wconf 93'>Leistungen</span>
|
||||
<span class='ocrx_word' id='word_1_234' title='bbox 283 1307 381 1325; x_wconf 93'>staatlicher</span>
|
||||
<span class='ocrx_word' id='word_1_235' title='bbox 387 1307 459 1325; x_wconf 96'>Archive</span>
|
||||
<span class='ocrx_word' id='word_1_236' title='bbox 467 1307 502 1325; x_wconf 96'>sein</span>
|
||||
<span class='ocrx_word' id='word_1_237' title='bbox 511 1307 566 1325; x_wconf 96'>sollte.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_12' title="bbox 134 1358 1078 1622">
|
||||
<p class='ocr_par' id='par_1_11' lang='eng' title="bbox 135 1358 1078 1516">
|
||||
<span class='ocr_line' id='line_1_30' title="bbox 135 1358 1047 1383; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_238' title='bbox 135 1361 152 1378; x_wconf 93'>2.</span>
|
||||
<span class='ocrx_word' id='word_1_239' title='bbox 162 1361 238 1378; x_wconf 93'>Heimat-</span>
|
||||
<span class='ocrx_word' id='word_1_240' title='bbox 246 1360 282 1378; x_wconf 92'>und</span>
|
||||
<span class='ocrx_word' id='word_1_241' title='bbox 291 1360 466 1383; x_wconf 92'>Familienforschung</span>
|
||||
<span class='ocrx_word' id='word_1_242' title='bbox 475 1361 495 1378; x_wconf 96'>ist</span>
|
||||
<span class='ocrx_word' id='word_1_243' title='bbox 502 1360 533 1378; x_wconf 95'>auf</span>
|
||||
<span class='ocrx_word' id='word_1_244' title='bbox 540 1360 568 1378; x_wconf 93'>die</span>
|
||||
<span class='ocrx_word' id='word_1_245' title='bbox 577 1358 710 1383; x_wconf 92'>Uberlieferung</span>
|
||||
<span class='ocrx_word' id='word_1_246' title='bbox 716 1360 765 1383; x_wconf 96'>jeder</span>
|
||||
<span class='ocrx_word' id='word_1_247' title='bbox 772 1361 802 1378; x_wconf 96'>Art</span>
|
||||
<span class='ocrx_word' id='word_1_248' title='bbox 809 1365 842 1378; x_wconf 93'>von</span>
|
||||
<span class='ocrx_word' id='word_1_249' title='bbox 851 1360 1047 1383; x_wconf 92'>personenbezogenen</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_31' title="bbox 136 1386 1078 1409; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_250' title='bbox 136 1388 191 1404; x_wconf 96'>Daten</span>
|
||||
<span class='ocrx_word' id='word_1_251' title='bbox 201 1387 216 1404; x_wconf 93'>in</span>
|
||||
<span class='ocrx_word' id='word_1_252' title='bbox 224 1386 319 1409; x_wconf 92'>Archivgut</span>
|
||||
<span class='ocrx_word' id='word_1_253' title='bbox 326 1387 444 1409; x_wconf 92'>angewiesen.</span>
|
||||
<span class='ocrx_word' id='word_1_254' title='bbox 455 1388 486 1404; x_wconf 95'>Um</span>
|
||||
<span class='ocrx_word' id='word_1_255' title='bbox 495 1386 531 1404; x_wconf 95'>dies</span>
|
||||
<span class='ocrx_word' id='word_1_256' title='bbox 539 1392 560 1404; x_wconf 93'>zu</span>
|
||||
<span class='ocrx_word' id='word_1_257' title='bbox 569 1386 710 1409; x_wconf 73'>gewahrleisten,</span>
|
||||
<span class='ocrx_word' id='word_1_258' title='bbox 719 1388 790 1404; x_wconf 81'>mtissen</span>
|
||||
<span class='ocrx_word' id='word_1_259' title='bbox 799 1386 826 1404; x_wconf 93'>die</span>
|
||||
<span class='ocrx_word' id='word_1_260' title='bbox 835 1386 964 1409; x_wconf 90'>einschlagigen</span>
|
||||
<span class='ocrx_word' id='word_1_261' title='bbox 973 1386 1078 1409; x_wconf 92'>Unterlagen</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_32' title="bbox 135 1411 1050 1436; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_262' title='bbox 135 1413 170 1431; x_wconf 93'>den</span>
|
||||
<span class='ocrx_word' id='word_1_263' title='bbox 178 1413 263 1431; x_wconf 92'>Archiven</span>
|
||||
<span class='ocrx_word' id='word_1_264' title='bbox 271 1413 373 1436; x_wconf 88'>vollstandig</span>
|
||||
<span class='ocrx_word' id='word_1_265' title='bbox 381 1418 411 1431; x_wconf 93'>zur</span>
|
||||
<span class='ocrx_word' id='word_1_266' title='bbox 419 1411 531 1431; x_wconf 92'>Ubernahme</span>
|
||||
<span class='ocrx_word' id='word_1_267' title='bbox 539 1413 644 1436; x_wconf 92'>angeboten</span>
|
||||
<span class='ocrx_word' id='word_1_268' title='bbox 651 1413 730 1431; x_wconf 96'>werden.</span>
|
||||
<span class='ocrx_word' id='word_1_269' title='bbox 739 1415 775 1431; x_wconf 96'>Aus</span>
|
||||
<span class='ocrx_word' id='word_1_270' title='bbox 783 1418 853 1431; x_wconf 93'>unserer</span>
|
||||
<span class='ocrx_word' id='word_1_271' title='bbox 861 1413 907 1431; x_wconf 93'>Sicht</span>
|
||||
<span class='ocrx_word' id='word_1_272' title='bbox 914 1413 968 1431; x_wconf 96'>bleibt</span>
|
||||
<span class='ocrx_word' id='word_1_273' title='bbox 975 1413 1010 1431; x_wconf 96'>hier</span>
|
||||
<span class='ocrx_word' id='word_1_274' title='bbox 1018 1413 1050 1431; x_wconf 96'>das</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_33' title="bbox 135 1437 1015 1463; baseline 0 -5; x_size 22; x_descenders 4; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_275' title='bbox 135 1440 236 1463; x_wconf 92'>bayerische</span>
|
||||
<span class='ocrx_word' id='word_1_276' title='bbox 243 1437 412 1462; x_wconf 92'>Anderungsgesetz</span>
|
||||
<span class='ocrx_word' id='word_1_277' title='bbox 420 1440 476 1458; x_wconf 93'>hinter</span>
|
||||
<span class='ocrx_word' id='word_1_278' title='bbox 483 1440 517 1458; x_wconf 93'>den</span>
|
||||
<span class='ocrx_word' id='word_1_279' title='bbox 527 1440 641 1462; x_wconf 92'>Regelungen</span>
|
||||
<span class='ocrx_word' id='word_1_280' title='bbox 649 1440 719 1460; x_wconf 39'>zurtck,</span>
|
||||
<span class='ocrx_word' id='word_1_281' title='bbox 728 1440 755 1458; x_wconf 92'>die</span>
|
||||
<span class='ocrx_word' id='word_1_282' title='bbox 764 1440 779 1457; x_wconf 92'>in</span>
|
||||
<span class='ocrx_word' id='word_1_283' title='bbox 787 1440 865 1458; x_wconf 93'>anderen</span>
|
||||
<span class='ocrx_word' id='word_1_284' title='bbox 874 1440 1015 1458; x_wconf 82'>Bundeslandern</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_34' title="bbox 135 1466 1066 1489; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_285' title='bbox 135 1466 198 1484; x_wconf 93'>bereits</span>
|
||||
<span class='ocrx_word' id='word_1_286' title='bbox 206 1466 357 1489; x_wconf 92'>archivgesetzlich</span>
|
||||
<span class='ocrx_word' id='word_1_287' title='bbox 367 1467 449 1484; x_wconf 92'>normiert</span>
|
||||
<span class='ocrx_word' id='word_1_288' title='bbox 456 1466 534 1484; x_wconf 93'>wurden.</span>
|
||||
<span class='ocrx_word' id='word_1_289' title='bbox 543 1467 643 1484; x_wconf 92'>Verwiesen</span>
|
||||
<span class='ocrx_word' id='word_1_290' title='bbox 652 1467 676 1484; x_wconf 96'>sei</span>
|
||||
<span class='ocrx_word' id='word_1_291' title='bbox 684 1466 736 1484; x_wconf 93'>dabei</span>
|
||||
<span class='ocrx_word' id='word_1_292' title='bbox 745 1466 871 1484; x_wconf 92'>insbesondere</span>
|
||||
<span class='ocrx_word' id='word_1_293' title='bbox 878 1466 909 1484; x_wconf 97'>auf</span>
|
||||
<span class='ocrx_word' id='word_1_294' title='bbox 916 1466 947 1484; x_wconf 93'>das</span>
|
||||
<span class='ocrx_word' id='word_1_295' title='bbox 955 1466 1023 1484; x_wconf 93'>soeben</span>
|
||||
<span class='ocrx_word' id='word_1_296' title='bbox 1032 1472 1066 1485; x_wconf 96'>neu</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_35' title="bbox 135 1493 672 1516; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_297' title='bbox 135 1493 214 1516; x_wconf 92'>gefasste</span>
|
||||
<span class='ocrx_word' id='word_1_298' title='bbox 221 1493 345 1516; x_wconf 92'>Archivgesetz</span>
|
||||
<span class='ocrx_word' id='word_1_299' title='bbox 353 1493 384 1511; x_wconf 96'>des</span>
|
||||
<span class='ocrx_word' id='word_1_300' title='bbox 392 1493 457 1511; x_wconf 93'>Landes</span>
|
||||
<span class='ocrx_word' id='word_1_301' title='bbox 466 1493 672 1516; x_wconf 26'>Baden-Wiurttemberg!</span>
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<p class='ocr_par' id='par_1_12' lang='eng' title="bbox 134 1517 1045 1622">
|
||||
<span class='ocr_line' id='line_1_36' title="bbox 135 1517 1014 1543; baseline 0 -5; x_size 25; x_descenders 6; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_302' title='bbox 135 1521 167 1538; x_wconf 94'>Um</span>
|
||||
<span class='ocrx_word' id='word_1_303' title='bbox 175 1520 215 1538; x_wconf 93'>eine</span>
|
||||
<span class='ocrx_word' id='word_1_304' title='bbox 222 1522 345 1542; x_wconf 90'>transparente</span>
|
||||
<span class='ocrx_word' id='word_1_305' title='bbox 354 1519 389 1538; x_wconf 96'>und</span>
|
||||
<span class='ocrx_word' id='word_1_306' title='bbox 398 1519 429 1538; x_wconf 91'>auf</span>
|
||||
<span class='ocrx_word' id='word_1_307' title='bbox 435 1520 558 1543; x_wconf 90'>vollstandiger</span>
|
||||
<span class='ocrx_word' id='word_1_308' title='bbox 566 1519 788 1543; x_wconf 88'>Informationsgrundlage</span>
|
||||
<span class='ocrx_word' id='word_1_309' title='bbox 795 1519 875 1538; x_wconf 91'>fuBende</span>
|
||||
<span class='ocrx_word' id='word_1_310' title='bbox 884 1517 1014 1543; x_wconf 92'>Uberlieferung</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_37' title="bbox 135 1546 1045 1569; baseline 0 -5; x_size 23; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_311' title='bbox 135 1546 329 1569; x_wconf 92'>personenbezogener</span>
|
||||
<span class='ocrx_word' id='word_1_312' title='bbox 337 1546 372 1564; x_wconf 93'>und</span>
|
||||
<span class='ocrx_word' id='word_1_313' title='bbox 380 1546 646 1569; x_wconf 36'>geheimhaltungsbedirftiger</span>
|
||||
<span class='ocrx_word' id='word_1_314' title='bbox 654 1546 759 1569; x_wconf 91'>Unterlagen</span>
|
||||
<span class='ocrx_word' id='word_1_315' title='bbox 768 1546 799 1564; x_wconf 93'>der</span>
|
||||
<span class='ocrx_word' id='word_1_316' title='bbox 806 1546 919 1565; x_wconf 61'>éffentlichen</span>
|
||||
<span class='ocrx_word' id='word_1_317' title='bbox 929 1546 978 1565; x_wconf 96'>Hand</span>
|
||||
<span class='ocrx_word' id='word_1_318' title='bbox 987 1547 1002 1564; x_wconf 95'>in</span>
|
||||
<span class='ocrx_word' id='word_1_319' title='bbox 1011 1546 1045 1565; x_wconf 96'>den</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_38' title="bbox 134 1572 1040 1596; baseline 0 -5; x_size 22; x_descenders 3; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_320' title='bbox 134 1573 237 1591; x_wconf 91'>staatlichen</span>
|
||||
<span class='ocrx_word' id='word_1_321' title='bbox 246 1572 330 1591; x_wconf 92'>Archiven</span>
|
||||
<span class='ocrx_word' id='word_1_322' title='bbox 338 1578 359 1591; x_wconf 93'>zu</span>
|
||||
<span class='ocrx_word' id='word_1_323' title='bbox 368 1572 506 1596; x_wconf 82'>gewahrleisten,</span>
|
||||
<span class='ocrx_word' id='word_1_324' title='bbox 516 1573 536 1591; x_wconf 96'>ist</span>
|
||||
<span class='ocrx_word' id='word_1_325' title='bbox 543 1578 563 1591; x_wconf 96'>es</span>
|
||||
<span class='ocrx_word' id='word_1_326' title='bbox 570 1578 602 1591; x_wconf 96'>aus</span>
|
||||
<span class='ocrx_word' id='word_1_327' title='bbox 611 1578 664 1591; x_wconf 93'>unser</span>
|
||||
<span class='ocrx_word' id='word_1_328' title='bbox 671 1572 718 1591; x_wconf 92'>Sicht</span>
|
||||
<span class='ocrx_word' id='word_1_329' title='bbox 725 1572 840 1594; x_wconf 51'>unerlasslich,</span>
|
||||
<span class='ocrx_word' id='word_1_330' title='bbox 849 1573 907 1591; x_wconf 92'>neben</span>
|
||||
<span class='ocrx_word' id='word_1_331' title='bbox 916 1572 946 1591; x_wconf 95'>der</span>
|
||||
<span class='ocrx_word' id='word_1_332' title='bbox 953 1572 1016 1591; x_wconf 95'>aktuell</span>
|
||||
<span class='ocrx_word' id='word_1_333' title='bbox 1025 1573 1040 1591; x_wconf 96'>in</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_39' title="bbox 134 1599 1038 1622; baseline 0 -4; x_size 23; x_descenders 4; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_334' title='bbox 134 1601 168 1618; x_wconf 96'>Art.</span>
|
||||
<span class='ocrx_word' id='word_1_335' title='bbox 178 1601 189 1618; x_wconf 96'>6</span>
|
||||
<span class='ocrx_word' id='word_1_336' title='bbox 196 1599 238 1618; x_wconf 69'>Abs.</span>
|
||||
<span class='ocrx_word' id='word_1_337' title='bbox 249 1601 255 1618; x_wconf 96'>1</span>
|
||||
<span class='ocrx_word' id='word_1_338' title='bbox 267 1600 306 1618; x_wconf 96'>Satz</span>
|
||||
<span class='ocrx_word' id='word_1_339' title='bbox 314 1600 324 1618; x_wconf 96'>3</span>
|
||||
<span class='ocrx_word' id='word_1_340' title='bbox 334 1601 359 1617; x_wconf 95'>Nr.</span>
|
||||
<span class='ocrx_word' id='word_1_341' title='bbox 371 1601 376 1618; x_wconf 92'>1</span>
|
||||
<span class='ocrx_word' id='word_1_342' title='bbox 387 1599 409 1620; x_wconf 92'>(in</span>
|
||||
<span class='ocrx_word' id='word_1_343' title='bbox 418 1599 449 1618; x_wconf 93'>der</span>
|
||||
<span class='ocrx_word' id='word_1_344' title='bbox 457 1599 575 1622; x_wconf 49'>Begrtindung</span>
|
||||
<span class='ocrx_word' id='word_1_345' title='bbox 584 1601 610 1618; x_wconf 86'>Nr.</span>
|
||||
<span class='ocrx_word' id='word_1_346' title='bbox 620 1601 631 1618; x_wconf 96'>5</span>
|
||||
<span class='ocrx_word' id='word_1_347' title='bbox 638 1599 656 1621; x_wconf 96'>a)</span>
|
||||
<span class='ocrx_word' id='word_1_348' title='bbox 663 1599 695 1618; x_wconf 93'>des</span>
|
||||
<span class='ocrx_word' id='word_1_349' title='bbox 704 1599 788 1618; x_wconf 92'>Entwurfs</span>
|
||||
<span class='ocrx_word' id='word_1_350' title='bbox 795 1599 929 1622; x_wconf 92'>vorgesehenen</span>
|
||||
<span class='ocrx_word' id='word_1_351' title='bbox 937 1599 1038 1622; x_wconf 90'>Anbietung</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
72
crates/kreuzberg/test_data/hocr/invoice_image_default.hocr
Normal file
72
crates/kreuzberg/test_data/hocr/invoice_image_default.hocr
Normal file
@@ -0,0 +1,72 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
|
||||
<meta name='ocr-system' content='tesseract 5.5.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_dir ocrp_lang ocrp_wconf'/>
|
||||
</head>
|
||||
<body>
|
||||
<div class='ocr_page' id='page_1' title='image "/var/folders/f8/_s_ks96d60x_6g__y7vft2wc0000gn/T/tmp80a1e_jo.png"; bbox 0 0 800 1000; ppageno 0; scan_res 70 70'>
|
||||
<div class='ocr_carea' id='block_1_1' title="bbox 51 52 90 60">
|
||||
<p class='ocr_par' id='par_1_1' lang='eng' title="bbox 51 52 90 60">
|
||||
<span class='ocr_line' id='line_1_1' title="bbox 51 52 90 60; baseline 0 0; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_1' title='bbox 51 52 90 60; x_wconf 49'>INVOICE</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_2' title="bbox 51 152 79 160">
|
||||
<p class='ocr_par' id='par_1_2' lang='eng' title="bbox 51 152 79 160">
|
||||
<span class='ocr_line' id='line_1_2' title="bbox 51 152 79 160; baseline 0 0; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_2' title='bbox 51 152 79 160; x_wconf 26'>Billa</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_3' title="bbox 51 202 94 210">
|
||||
<p class='ocr_par' id='par_1_3' lang='eng' title="bbox 51 202 94 210">
|
||||
<span class='ocr_line' id='line_1_3' title="bbox 51 202 94 210; baseline 0 0; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_3' title='bbox 51 202 73 210; x_wconf 30'>Jahn</span>
|
||||
<span class='ocrx_word' id='word_1_4' title='bbox 79 191 99 219; x_wconf 66'>Doe</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_4' title="bbox 51 252 105 260">
|
||||
<p class='ocr_par' id='par_1_4' lang='eng' title="bbox 51 252 105 260">
|
||||
<span class='ocr_line' id='line_1_4' title="bbox 51 252 105 260; baseline 0 0; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_5' title='bbox 51 252 67 260; x_wconf 88'>123</span>
|
||||
<span class='ocrx_word' id='word_1_6' title='bbox 71 252 93 260; x_wconf 91'>Main</span>
|
||||
<span class='ocrx_word' id='word_1_7' title='bbox 97 252 105 260; x_wconf 96'>st</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_5' title="bbox 50 302 145 312">
|
||||
<p class='ocr_par' id='par_1_5' lang='eng' title="bbox 50 302 145 312">
|
||||
<span class='ocr_line' id='line_1_5' title="bbox 50 302 145 312; baseline 0 -2; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_8' title='bbox 50 302 92 312; x_wconf 71'>Anytown,</span>
|
||||
<span class='ocrx_word' id='word_1_9' title='bbox 97 292 114 320; x_wconf 73'>USA</span>
|
||||
<span class='ocrx_word' id='word_1_10' title='bbox 117 292 145 320; x_wconf 57'>12345</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_6' title="bbox 401 152 463 160">
|
||||
<p class='ocr_par' id='par_1_6' lang='eng' title="bbox 401 152 463 160">
|
||||
<span class='ocr_line' id='line_1_6' title="bbox 401 152 463 160; baseline 0 0; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_11' title='bbox 401 152 433 160; x_wconf 93'>Invoice</span>
|
||||
<span class='ocrx_word' id='word_1_12' title='bbox 439 152 441 160; x_wconf 76'>#</span>
|
||||
<span class='ocrx_word' id='word_1_13' title='bbox 447 152 463 160; x_wconf 59'>123</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_7' title="bbox 401 202 478 210">
|
||||
<p class='ocr_par' id='par_1_7' lang='eng' title="bbox 401 202 478 210">
|
||||
<span class='ocr_line' id='line_1_7' title="bbox 401 202 478 210; baseline 0 0; x_size 20; x_descenders 5; x_ascenders 5">
|
||||
<span class='ocrx_word' id='word_1_14' title='bbox 401 202 420 210; x_wconf 87'>Date:</span>
|
||||
<span class='ocrx_word' id='word_1_15' title='bbox 426 202 478 210; x_wconf 87'>2025-07-10</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
12
crates/kreuzberg/test_data/hocr/utf8_encoding.hocr
Normal file
12
crates/kreuzberg/test_data/hocr/utf8_encoding.hocr
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title>utf-8 + correct declaraction</title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
</head>
|
||||
<body>
|
||||
<p>fööbär</p>
|
||||
</body>
|
||||
</html>
|
||||
541
crates/kreuzberg/test_data/hocr/v4_code_formula.hocr
Normal file
541
crates/kreuzberg/test_data/hocr/v4_code_formula.hocr
Normal file
@@ -0,0 +1,541 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
|
||||
<meta name='ocr-system' content='tesseract 5.5.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_dir ocrp_lang ocrp_wconf'/>
|
||||
</head>
|
||||
<body>
|
||||
<div class='ocr_page' id='page_1' title='image "/var/folders/88/hg20130j1lv82jbjnvw0yq5m0000gn/T/tess_npffyz68_input.PNG"; bbox 0 0 2550 3301; ppageno 0; scan_res 70 70'>
|
||||
<div class='ocr_carea' id='block_1_1' title="bbox 559 520 1314 573">
|
||||
<p class='ocr_par' id='par_1_1' lang='eng' title="bbox 559 520 1314 573">
|
||||
<span class='ocr_line' id='line_1_1' title="bbox 559 520 1314 573; baseline 0 -11; x_size 53; x_descenders 11; x_ascenders 15">
|
||||
<span class='ocrx_word' id='word_1_1' title='bbox 559 520 860 573; x_wconf 95'>JavaScript</span>
|
||||
<span class='ocrx_word' id='word_1_2' title='bbox 890 520 1037 562; x_wconf 95'>Code</span>
|
||||
<span class='ocrx_word' id='word_1_3' title='bbox 1063 520 1314 573; x_wconf 95'>Example</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_2' title="bbox 558 624 1988 1409">
|
||||
<p class='ocr_par' id='par_1_2' lang='eng' title="bbox 558 624 1988 1209">
|
||||
<span class='ocr_line' id='line_1_2' title="bbox 559 624 1986 662; baseline 0 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_4' title='bbox 559 625 673 654; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_5' title='bbox 689 625 796 661; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_6' title='bbox 812 624 902 654; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_7' title='bbox 919 625 959 654; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_8' title='bbox 978 627 1074 661; x_wconf 96'>amet,</span>
|
||||
<span class='ocrx_word' id='word_1_9' title='bbox 1093 627 1278 654; x_wconf 96'>consetetur</span>
|
||||
<span class='ocrx_word' id='word_1_10' title='bbox 1295 624 1478 662; x_wconf 96'>sadipscing</span>
|
||||
<span class='ocrx_word' id='word_1_11' title='bbox 1494 624 1575 661; x_wconf 96'>elitr,</span>
|
||||
<span class='ocrx_word' id='word_1_12' title='bbox 1594 624 1649 654; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_13' title='bbox 1666 624 1754 654; x_wconf 95'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_14' title='bbox 1771 634 1913 662; x_wconf 93'>nonumy</span>
|
||||
<span class='ocrx_word' id='word_1_15' title='bbox 1929 625 1986 654; x_wconf 92'>eir-</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_3' title="bbox 559 673 1986 711; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_16' title='bbox 559 673 636 703; x_wconf 96'>mod</span>
|
||||
<span class='ocrx_word' id='word_1_17' title='bbox 650 676 778 710; x_wconf 96'>tempor</span>
|
||||
<span class='ocrx_word' id='word_1_18' title='bbox 793 673 940 703; x_wconf 96'>invidunt</span>
|
||||
<span class='ocrx_word' id='word_1_19' title='bbox 956 677 991 703; x_wconf 95'>ut</span>
|
||||
<span class='ocrx_word' id='word_1_20' title='bbox 1007 673 1116 703; x_wconf 96'>labore</span>
|
||||
<span class='ocrx_word' id='word_1_21' title='bbox 1130 676 1162 703; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_22' title='bbox 1177 673 1286 703; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_23' title='bbox 1300 683 1419 711; x_wconf 95'>magna</span>
|
||||
<span class='ocrx_word' id='word_1_24' title='bbox 1433 673 1595 711; x_wconf 95'>aliquyam</span>
|
||||
<span class='ocrx_word' id='word_1_25' title='bbox 1609 677 1688 710; x_wconf 96'>erat,</span>
|
||||
<span class='ocrx_word' id='word_1_26' title='bbox 1705 673 1760 703; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_27' title='bbox 1775 673 1862 703; x_wconf 93'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_28' title='bbox 1876 673 1986 710; x_wconf 92'>volup-</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_4' title="bbox 558 722 1987 761; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_29' title='bbox 558 727 625 753; x_wconf 96'>tua.</span>
|
||||
<span class='ocrx_word' id='word_1_30' title='bbox 653 722 696 753; x_wconf 96'>At</span>
|
||||
<span class='ocrx_word' id='word_1_31' title='bbox 714 733 788 753; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_32' title='bbox 805 733 859 753; x_wconf 96'>eos</span>
|
||||
<span class='ocrx_word' id='word_1_33' title='bbox 876 726 908 753; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_34' title='bbox 927 733 1077 753; x_wconf 96'>accusam</span>
|
||||
<span class='ocrx_word' id='word_1_35' title='bbox 1094 727 1125 753; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_36' title='bbox 1141 724 1231 761; x_wconf 96'>justo</span>
|
||||
<span class='ocrx_word' id='word_1_37' title='bbox 1248 723 1313 753; x_wconf 96'>duo</span>
|
||||
<span class='ocrx_word' id='word_1_38' title='bbox 1331 723 1455 753; x_wconf 96'>dolores</span>
|
||||
<span class='ocrx_word' id='word_1_39' title='bbox 1473 726 1504 753; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_40' title='bbox 1522 733 1561 753; x_wconf 95'>ea</span>
|
||||
<span class='ocrx_word' id='word_1_41' title='bbox 1577 723 1699 753; x_wconf 95'>rebum.</span>
|
||||
<span class='ocrx_word' id='word_1_42' title='bbox 1728 723 1797 753; x_wconf 95'>Stet</span>
|
||||
<span class='ocrx_word' id='word_1_43' title='bbox 1816 723 1893 753; x_wconf 94'>clita</span>
|
||||
<span class='ocrx_word' id='word_1_44' title='bbox 1909 723 1987 753; x_wconf 94'>kasd</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_5' title="bbox 558 773 1986 811; baseline -0.001 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_45' title='bbox 558 773 747 811; x_wconf 95'>gubergren,</span>
|
||||
<span class='ocrx_word' id='word_1_46' title='bbox 765 783 806 803; x_wconf 95'>no</span>
|
||||
<span class='ocrx_word' id='word_1_47' title='bbox 822 783 876 803; x_wconf 96'>sea</span>
|
||||
<span class='ocrx_word' id='word_1_48' title='bbox 890 773 1052 803; x_wconf 96'>takimata</span>
|
||||
<span class='ocrx_word' id='word_1_49' title='bbox 1066 776 1198 803; x_wconf 96'>sanctus</span>
|
||||
<span class='ocrx_word' id='word_1_50' title='bbox 1213 777 1261 803; x_wconf 95'>est</span>
|
||||
<span class='ocrx_word' id='word_1_51' title='bbox 1278 774 1392 803; x_wconf 95'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_52' title='bbox 1407 774 1514 810; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_53' title='bbox 1529 773 1619 803; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_54' title='bbox 1635 774 1675 803; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_55' title='bbox 1692 776 1789 803; x_wconf 96'>amet.</span>
|
||||
<span class='ocrx_word' id='word_1_56' title='bbox 1812 774 1926 803; x_wconf 93'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_57' title='bbox 1941 774 1986 810; x_wconf 91'>ip-</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_6' title="bbox 559 823 1987 861; baseline -0.001 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_58' title='bbox 559 833 631 853; x_wconf 96'>sum</span>
|
||||
<span class='ocrx_word' id='word_1_59' title='bbox 643 823 733 853; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_60' title='bbox 745 824 786 853; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_61' title='bbox 800 826 897 860; x_wconf 96'>amet,</span>
|
||||
<span class='ocrx_word' id='word_1_62' title='bbox 912 826 1097 853; x_wconf 96'>consetetur</span>
|
||||
<span class='ocrx_word' id='word_1_63' title='bbox 1109 823 1292 861; x_wconf 96'>sadipscing</span>
|
||||
<span class='ocrx_word' id='word_1_64' title='bbox 1304 823 1385 860; x_wconf 96'>elitr,</span>
|
||||
<span class='ocrx_word' id='word_1_65' title='bbox 1400 823 1456 853; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_66' title='bbox 1469 823 1556 853; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_67' title='bbox 1569 833 1710 861; x_wconf 96'>nonumy</span>
|
||||
<span class='ocrx_word' id='word_1_68' title='bbox 1723 823 1846 853; x_wconf 96'>eirmod</span>
|
||||
<span class='ocrx_word' id='word_1_69' title='bbox 1859 826 1987 860; x_wconf 96'>tempor</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_7' title="bbox 559 872 1987 911; baseline 0 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_70' title='bbox 559 873 706 903; x_wconf 96'>invidunt</span>
|
||||
<span class='ocrx_word' id='word_1_71' title='bbox 724 877 759 903; x_wconf 96'>ut</span>
|
||||
<span class='ocrx_word' id='word_1_72' title='bbox 777 873 886 903; x_wconf 96'>labore</span>
|
||||
<span class='ocrx_word' id='word_1_73' title='bbox 903 876 934 903; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_74' title='bbox 952 873 1060 903; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_75' title='bbox 1077 883 1195 911; x_wconf 96'>magna</span>
|
||||
<span class='ocrx_word' id='word_1_76' title='bbox 1211 873 1373 911; x_wconf 96'>aliquyam</span>
|
||||
<span class='ocrx_word' id='word_1_77' title='bbox 1389 876 1468 910; x_wconf 96'>erat,</span>
|
||||
<span class='ocrx_word' id='word_1_78' title='bbox 1487 873 1542 903; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_79' title='bbox 1559 873 1647 903; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_80' title='bbox 1662 873 1829 910; x_wconf 95'>voluptua.</span>
|
||||
<span class='ocrx_word' id='word_1_81' title='bbox 1853 872 1896 903; x_wconf 96'>At</span>
|
||||
<span class='ocrx_word' id='word_1_82' title='bbox 1913 883 1987 903; x_wconf 96'>vero</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_8' title="bbox 558 923 1985 961; baseline 0 -8; x_size 39; x_descenders 9; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_83' title='bbox 558 933 611 953; x_wconf 96'>eos</span>
|
||||
<span class='ocrx_word' id='word_1_84' title='bbox 632 926 663 953; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_85' title='bbox 685 933 835 953; x_wconf 96'>accusam</span>
|
||||
<span class='ocrx_word' id='word_1_86' title='bbox 855 926 886 953; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_87' title='bbox 904 924 994 961; x_wconf 95'>justo</span>
|
||||
<span class='ocrx_word' id='word_1_88' title='bbox 1014 923 1079 953; x_wconf 96'>duo</span>
|
||||
<span class='ocrx_word' id='word_1_89' title='bbox 1099 923 1223 953; x_wconf 96'>dolores</span>
|
||||
<span class='ocrx_word' id='word_1_90' title='bbox 1244 926 1275 953; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_91' title='bbox 1296 933 1334 953; x_wconf 95'>ea</span>
|
||||
<span class='ocrx_word' id='word_1_92' title='bbox 1353 923 1476 953; x_wconf 96'>rebum.</span>
|
||||
<span class='ocrx_word' id='word_1_93' title='bbox 1512 923 1582 953; x_wconf 96'>Stet</span>
|
||||
<span class='ocrx_word' id='word_1_94' title='bbox 1603 923 1680 953; x_wconf 96'>clita</span>
|
||||
<span class='ocrx_word' id='word_1_95' title='bbox 1699 923 1776 953; x_wconf 96'>kasd</span>
|
||||
<span class='ocrx_word' id='word_1_96' title='bbox 1797 923 1985 961; x_wconf 96'>gubergren,</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_9' title="bbox 559 972 1987 1009; baseline -0.001 -7; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_97' title='bbox 559 982 600 1002; x_wconf 97'>no</span>
|
||||
<span class='ocrx_word' id='word_1_98' title='bbox 619 982 673 1002; x_wconf 96'>sea</span>
|
||||
<span class='ocrx_word' id='word_1_99' title='bbox 690 972 852 1002; x_wconf 96'>takimata</span>
|
||||
<span class='ocrx_word' id='word_1_100' title='bbox 869 975 1001 1002; x_wconf 96'>sanctus</span>
|
||||
<span class='ocrx_word' id='word_1_101' title='bbox 1019 975 1067 1002; x_wconf 96'>est</span>
|
||||
<span class='ocrx_word' id='word_1_102' title='bbox 1087 973 1201 1002; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_103' title='bbox 1219 973 1326 1009; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_104' title='bbox 1344 972 1434 1002; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_105' title='bbox 1453 973 1493 1002; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_106' title='bbox 1513 976 1609 1002; x_wconf 96'>amet.</span>
|
||||
<span class='ocrx_word' id='word_1_107' title='bbox 1640 973 1754 1002; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_108' title='bbox 1772 973 1879 1009; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_109' title='bbox 1897 972 1987 1002; x_wconf 96'>dolor</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_10' title="bbox 559 1022 1986 1060; baseline -0.001 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_110' title='bbox 559 1023 599 1052; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_111' title='bbox 616 1025 713 1059; x_wconf 96'>amet,</span>
|
||||
<span class='ocrx_word' id='word_1_112' title='bbox 730 1025 915 1052; x_wconf 96'>consetetur</span>
|
||||
<span class='ocrx_word' id='word_1_113' title='bbox 930 1022 1113 1060; x_wconf 96'>sadipscing</span>
|
||||
<span class='ocrx_word' id='word_1_114' title='bbox 1127 1022 1209 1059; x_wconf 96'>elitr,</span>
|
||||
<span class='ocrx_word' id='word_1_115' title='bbox 1226 1022 1281 1052; x_wconf 95'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_116' title='bbox 1297 1022 1385 1052; x_wconf 95'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_117' title='bbox 1400 1032 1541 1060; x_wconf 96'>nonumy</span>
|
||||
<span class='ocrx_word' id='word_1_118' title='bbox 1556 1022 1680 1052; x_wconf 96'>eirmod</span>
|
||||
<span class='ocrx_word' id='word_1_119' title='bbox 1695 1025 1823 1059; x_wconf 96'>tempor</span>
|
||||
<span class='ocrx_word' id='word_1_120' title='bbox 1838 1022 1986 1052; x_wconf 96'>invidunt</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_11' title="bbox 559 1071 1986 1110; baseline 0 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_121' title='bbox 559 1076 594 1102; x_wconf 95'>ut</span>
|
||||
<span class='ocrx_word' id='word_1_122' title='bbox 615 1072 724 1102; x_wconf 95'>labore</span>
|
||||
<span class='ocrx_word' id='word_1_123' title='bbox 744 1075 775 1102; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_124' title='bbox 796 1072 904 1102; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_125' title='bbox 924 1082 1042 1110; x_wconf 96'>magna</span>
|
||||
<span class='ocrx_word' id='word_1_126' title='bbox 1061 1072 1224 1110; x_wconf 95'>aliquyam</span>
|
||||
<span class='ocrx_word' id='word_1_127' title='bbox 1243 1075 1322 1109; x_wconf 95'>erat,</span>
|
||||
<span class='ocrx_word' id='word_1_128' title='bbox 1344 1072 1399 1102; x_wconf 95'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_129' title='bbox 1419 1072 1507 1102; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_130' title='bbox 1526 1072 1692 1109; x_wconf 96'>voluptua.</span>
|
||||
<span class='ocrx_word' id='word_1_131' title='bbox 1725 1071 1768 1102; x_wconf 96'>At</span>
|
||||
<span class='ocrx_word' id='word_1_132' title='bbox 1788 1082 1862 1102; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_133' title='bbox 1882 1082 1935 1102; x_wconf 96'>eos</span>
|
||||
<span class='ocrx_word' id='word_1_134' title='bbox 1955 1075 1986 1102; x_wconf 96'>et</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_12' title="bbox 559 1122 1988 1160; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_135' title='bbox 559 1132 709 1152; x_wconf 96'>accusam</span>
|
||||
<span class='ocrx_word' id='word_1_136' title='bbox 728 1126 759 1152; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_137' title='bbox 777 1123 866 1160; x_wconf 96'>justo</span>
|
||||
<span class='ocrx_word' id='word_1_138' title='bbox 886 1122 951 1152; x_wconf 95'>duo</span>
|
||||
<span class='ocrx_word' id='word_1_139' title='bbox 970 1122 1095 1152; x_wconf 96'>dolores</span>
|
||||
<span class='ocrx_word' id='word_1_140' title='bbox 1114 1125 1146 1152; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_141' title='bbox 1166 1132 1204 1152; x_wconf 95'>ea</span>
|
||||
<span class='ocrx_word' id='word_1_142' title='bbox 1223 1122 1345 1152; x_wconf 95'>rebum.</span>
|
||||
<span class='ocrx_word' id='word_1_143' title='bbox 1380 1122 1449 1152; x_wconf 96'>Stet</span>
|
||||
<span class='ocrx_word' id='word_1_144' title='bbox 1470 1122 1546 1152; x_wconf 96'>clita</span>
|
||||
<span class='ocrx_word' id='word_1_145' title='bbox 1565 1122 1642 1152; x_wconf 96'>kasd</span>
|
||||
<span class='ocrx_word' id='word_1_146' title='bbox 1662 1122 1851 1160; x_wconf 95'>gubergren,</span>
|
||||
<span class='ocrx_word' id='word_1_147' title='bbox 1873 1132 1914 1152; x_wconf 96'>no</span>
|
||||
<span class='ocrx_word' id='word_1_148' title='bbox 1934 1132 1988 1152; x_wconf 96'>sea</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_13' title="bbox 558 1172 1460 1209; baseline 0 -7; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_149' title='bbox 558 1172 720 1202; x_wconf 96'>takimata</span>
|
||||
<span class='ocrx_word' id='word_1_150' title='bbox 735 1175 866 1202; x_wconf 96'>sanctus</span>
|
||||
<span class='ocrx_word' id='word_1_151' title='bbox 882 1176 930 1202; x_wconf 96'>est</span>
|
||||
<span class='ocrx_word' id='word_1_152' title='bbox 947 1173 1061 1202; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_153' title='bbox 1077 1173 1184 1209; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_154' title='bbox 1200 1172 1290 1202; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_155' title='bbox 1306 1173 1346 1202; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_156' title='bbox 1364 1175 1460 1202; x_wconf 96'>amet.</span>
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<p class='ocr_par' id='par_1_3' lang='eng' title="bbox 558 1222 1987 1409">
|
||||
<span class='ocr_line' id='line_1_14' title="bbox 621 1222 1987 1259; baseline 0.001 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_157' title='bbox 621 1223 701 1252; x_wconf 96'>Duis</span>
|
||||
<span class='ocrx_word' id='word_1_158' title='bbox 716 1225 827 1252; x_wconf 96'>autem</span>
|
||||
<span class='ocrx_word' id='word_1_159' title='bbox 841 1222 890 1252; x_wconf 96'>vel</span>
|
||||
<span class='ocrx_word' id='word_1_160' title='bbox 905 1232 979 1252; x_wconf 96'>eum</span>
|
||||
<span class='ocrx_word' id='word_1_161' title='bbox 994 1223 1088 1252; x_wconf 96'>iriure</span>
|
||||
<span class='ocrx_word' id='word_1_162' title='bbox 1103 1222 1193 1252; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_163' title='bbox 1208 1223 1241 1251; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_164' title='bbox 1256 1222 1418 1252; x_wconf 95'>hendrerit</span>
|
||||
<span class='ocrx_word' id='word_1_165' title='bbox 1434 1223 1467 1251; x_wconf 95'>in</span>
|
||||
<span class='ocrx_word' id='word_1_166' title='bbox 1481 1222 1653 1259; x_wconf 96'>vulputate</span>
|
||||
<span class='ocrx_word' id='word_1_167' title='bbox 1668 1222 1743 1252; x_wconf 96'>velit</span>
|
||||
<span class='ocrx_word' id='word_1_168' title='bbox 1759 1232 1826 1252; x_wconf 96'>esse</span>
|
||||
<span class='ocrx_word' id='word_1_169' title='bbox 1841 1222 1987 1252; x_wconf 96'>molestie</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_15' title="bbox 559 1271 1987 1309; baseline 0 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_170' title='bbox 559 1274 745 1308; x_wconf 96'>consequat,</span>
|
||||
<span class='ocrx_word' id='word_1_171' title='bbox 766 1271 815 1301; x_wconf 95'>vel</span>
|
||||
<span class='ocrx_word' id='word_1_172' title='bbox 835 1271 925 1301; x_wconf 96'>illum</span>
|
||||
<span class='ocrx_word' id='word_1_173' title='bbox 944 1271 1053 1301; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_174' title='bbox 1072 1281 1112 1301; x_wconf 96'>eu</span>
|
||||
<span class='ocrx_word' id='word_1_175' title='bbox 1131 1271 1251 1309; x_wconf 96'>feugiat</span>
|
||||
<span class='ocrx_word' id='word_1_176' title='bbox 1272 1271 1359 1301; x_wconf 96'>nulla</span>
|
||||
<span class='ocrx_word' id='word_1_177' title='bbox 1378 1271 1505 1301; x_wconf 96'>facilisis</span>
|
||||
<span class='ocrx_word' id='word_1_178' title='bbox 1526 1274 1559 1301; x_wconf 96'>at</span>
|
||||
<span class='ocrx_word' id='word_1_179' title='bbox 1579 1281 1653 1301; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_180' title='bbox 1673 1281 1742 1301; x_wconf 96'>eros</span>
|
||||
<span class='ocrx_word' id='word_1_181' title='bbox 1762 1274 1793 1301; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_182' title='bbox 1814 1281 1987 1301; x_wconf 96'>accumsan</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_16' title="bbox 558 1321 1986 1359; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_183' title='bbox 558 1324 590 1351; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_184' title='bbox 609 1322 695 1351; x_wconf 96'>iusto</span>
|
||||
<span class='ocrx_word' id='word_1_185' title='bbox 713 1321 788 1351; x_wconf 96'>odio</span>
|
||||
<span class='ocrx_word' id='word_1_186' title='bbox 807 1321 974 1359; x_wconf 96'>dignissim</span>
|
||||
<span class='ocrx_word' id='word_1_187' title='bbox 992 1322 1046 1358; x_wconf 96'>qui</span>
|
||||
<span class='ocrx_word' id='word_1_188' title='bbox 1064 1321 1190 1351; x_wconf 96'>blandit</span>
|
||||
<span class='ocrx_word' id='word_1_189' title='bbox 1210 1324 1358 1358; x_wconf 96'>praesent</span>
|
||||
<span class='ocrx_word' id='word_1_190' title='bbox 1378 1321 1544 1358; x_wconf 95'>luptatum</span>
|
||||
<span class='ocrx_word' id='word_1_191' title='bbox 1562 1321 1636 1350; x_wconf 95'>zzril</span>
|
||||
<span class='ocrx_word' id='word_1_192' title='bbox 1655 1321 1773 1351; x_wconf 96'>delenit</span>
|
||||
<span class='ocrx_word' id='word_1_193' title='bbox 1793 1331 1896 1359; x_wconf 96'>augue</span>
|
||||
<span class='ocrx_word' id='word_1_194' title='bbox 1915 1321 1986 1351; x_wconf 96'>duis</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_17' title="bbox 559 1371 1628 1409; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_195' title='bbox 559 1371 667 1401; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_196' title='bbox 683 1374 715 1401; x_wconf 96'>te</span>
|
||||
<span class='ocrx_word' id='word_1_197' title='bbox 731 1371 851 1409; x_wconf 96'>feugait</span>
|
||||
<span class='ocrx_word' id='word_1_198' title='bbox 869 1371 956 1401; x_wconf 96'>nulla</span>
|
||||
<span class='ocrx_word' id='word_1_199' title='bbox 971 1371 1092 1401; x_wconf 96'>facilisi.</span>
|
||||
<span class='ocrx_word' id='word_1_200' title='bbox 1115 1372 1229 1401; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_201' title='bbox 1245 1372 1352 1408; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_202' title='bbox 1367 1371 1457 1401; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_203' title='bbox 1473 1372 1514 1401; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_204' title='bbox 1532 1374 1628 1408; x_wconf 96'>amet,</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_3' title="bbox 931 1458 1616 1496">
|
||||
<p class='ocr_par' id='par_1_4' lang='eng' title="bbox 931 1458 1616 1496">
|
||||
<span class='ocr_line' id='line_1_18' title="bbox 931 1458 1616 1496; baseline 0.001 -9; x_size 37; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_205' title='bbox 931 1459 1054 1496; x_wconf 96'>Listing</span>
|
||||
<span class='ocrx_word' id='word_1_206' title='bbox 1072 1459 1097 1487; x_wconf 96'>1:</span>
|
||||
<span class='ocrx_word' id='word_1_207' title='bbox 1122 1458 1240 1495; x_wconf 96'>Simple</span>
|
||||
<span class='ocrx_word' id='word_1_208' title='bbox 1257 1458 1443 1495; x_wconf 96'>JavaScript</span>
|
||||
<span class='ocrx_word' id='word_1_209' title='bbox 1460 1459 1616 1496; x_wconf 96'>Program</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_separator' id='block_1_4' title="bbox 543 1504 2004 1506"></div>
|
||||
<div class='ocr_carea' id='block_1_5' title="bbox 561 1524 1089 1696">
|
||||
<p class='ocr_par' id='par_1_5' lang='eng' title="bbox 562 1524 1024 1601">
|
||||
<span class='ocr_line' id='line_1_19' title="bbox 562 1524 1024 1555; baseline 0 -5; x_size 31; x_descenders 5; x_ascenders 9">
|
||||
<span class='ocrx_word' id='word_1_210' title='bbox 562 1527 742 1550; x_wconf 93'>function</span>
|
||||
<span class='ocrx_word' id='word_1_211' title='bbox 774 1524 902 1555; x_wconf 90'>add(a,</span>
|
||||
<span class='ocrx_word' id='word_1_212' title='bbox 936 1524 973 1553; x_wconf 92'>b)</span>
|
||||
<span class='ocrx_word' id='word_1_213' title='bbox 1008 1524 1024 1553; x_wconf 86'>{</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_20' title="bbox 656 1573 948 1601; baseline 0 -5; x_size 28; x_descenders 5; x_ascenders 7">
|
||||
<span class='ocrx_word' id='word_1_214' title='bbox 656 1575 789 1596; x_wconf 96'>return</span>
|
||||
<span class='ocrx_word' id='word_1_215' title='bbox 820 1573 948 1601; x_wconf 58'>a+b;</span>
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<p class='ocr_par' id='par_1_6' lang='eng' title="bbox 561 1615 577 1644">
|
||||
<span class='ocr_line' id='line_1_21' title="bbox 561 1615 577 1644; baseline 0 0; x_size 31.900002; x_descenders 5.5; x_ascenders 9.1999998">
|
||||
<span class='ocrx_word' id='word_1_216' title='bbox 561 1615 577 1644; x_wconf 85'>}</span>
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<p class='ocr_par' id='par_1_7' lang='eng' title="bbox 563 1661 1089 1696">
|
||||
<span class='ocr_line' id='line_1_22' title="bbox 563 1661 1089 1696; baseline 0 -9; x_size 31; x_descenders 5; x_ascenders 9">
|
||||
<span class='ocrx_word' id='word_1_217' title='bbox 563 1661 972 1696; x_wconf 90'>console.log(add(3,</span>
|
||||
<span class='ocrx_word' id='word_1_218' title='bbox 1009 1661 1089 1692; x_wconf 89'>5));</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_separator' id='block_1_6' title="bbox 543 1713 2004 1715"></div>
|
||||
<div class='ocr_carea' id='block_1_7' title="bbox 558 1746 1988 2532">
|
||||
<p class='ocr_par' id='par_1_8' lang='eng' title="bbox 558 1746 1988 2332">
|
||||
<span class='ocr_line' id='line_1_23' title="bbox 621 1746 1987 1784; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_219' title='bbox 621 1747 735 1776; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_220' title='bbox 752 1747 859 1783; x_wconf 95'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_221' title='bbox 877 1746 967 1776; x_wconf 95'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_222' title='bbox 985 1747 1025 1776; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_223' title='bbox 1045 1749 1141 1783; x_wconf 96'>amet,</span>
|
||||
<span class='ocrx_word' id='word_1_224' title='bbox 1162 1749 1347 1776; x_wconf 96'>consetetur</span>
|
||||
<span class='ocrx_word' id='word_1_225' title='bbox 1364 1746 1547 1784; x_wconf 96'>sadipscing</span>
|
||||
<span class='ocrx_word' id='word_1_226' title='bbox 1565 1746 1646 1783; x_wconf 96'>elitr,</span>
|
||||
<span class='ocrx_word' id='word_1_227' title='bbox 1666 1746 1722 1776; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_228' title='bbox 1740 1746 1828 1776; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_229' title='bbox 1845 1756 1987 1784; x_wconf 96'>nonumy</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_24' title="bbox 558 1796 1987 1834; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_230' title='bbox 558 1796 682 1826; x_wconf 96'>eirmod</span>
|
||||
<span class='ocrx_word' id='word_1_231' title='bbox 703 1799 831 1833; x_wconf 96'>tempor</span>
|
||||
<span class='ocrx_word' id='word_1_232' title='bbox 853 1796 1000 1826; x_wconf 96'>invidunt</span>
|
||||
<span class='ocrx_word' id='word_1_233' title='bbox 1023 1799 1059 1826; x_wconf 96'>ut</span>
|
||||
<span class='ocrx_word' id='word_1_234' title='bbox 1081 1796 1191 1826; x_wconf 96'>labore</span>
|
||||
<span class='ocrx_word' id='word_1_235' title='bbox 1212 1800 1243 1826; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_236' title='bbox 1266 1796 1374 1826; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_237' title='bbox 1396 1806 1514 1834; x_wconf 96'>magna</span>
|
||||
<span class='ocrx_word' id='word_1_238' title='bbox 1535 1796 1698 1834; x_wconf 95'>aliquyam</span>
|
||||
<span class='ocrx_word' id='word_1_239' title='bbox 1719 1799 1798 1833; x_wconf 96'>erat,</span>
|
||||
<span class='ocrx_word' id='word_1_240' title='bbox 1823 1796 1878 1826; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_241' title='bbox 1900 1796 1987 1826; x_wconf 96'>diam</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_25' title="bbox 558 1845 1988 1884; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_242' title='bbox 558 1846 724 1883; x_wconf 96'>voluptua.</span>
|
||||
<span class='ocrx_word' id='word_1_243' title='bbox 751 1845 794 1876; x_wconf 96'>At</span>
|
||||
<span class='ocrx_word' id='word_1_244' title='bbox 812 1856 886 1876; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_245' title='bbox 903 1856 957 1876; x_wconf 96'>eos</span>
|
||||
<span class='ocrx_word' id='word_1_246' title='bbox 974 1850 1005 1876; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_247' title='bbox 1024 1856 1174 1876; x_wconf 96'>accusam</span>
|
||||
<span class='ocrx_word' id='word_1_248' title='bbox 1191 1849 1223 1876; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_249' title='bbox 1238 1847 1328 1884; x_wconf 96'>justo</span>
|
||||
<span class='ocrx_word' id='word_1_250' title='bbox 1345 1846 1410 1876; x_wconf 96'>duo</span>
|
||||
<span class='ocrx_word' id='word_1_251' title='bbox 1427 1846 1551 1876; x_wconf 96'>dolores</span>
|
||||
<span class='ocrx_word' id='word_1_252' title='bbox 1569 1850 1600 1876; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_253' title='bbox 1619 1856 1657 1876; x_wconf 94'>ea</span>
|
||||
<span class='ocrx_word' id='word_1_254' title='bbox 1673 1846 1795 1876; x_wconf 95'>rebum.</span>
|
||||
<span class='ocrx_word' id='word_1_255' title='bbox 1823 1846 1893 1876; x_wconf 96'>Stet</span>
|
||||
<span class='ocrx_word' id='word_1_256' title='bbox 1911 1846 1988 1876; x_wconf 96'>clita</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_26' title="bbox 558 1896 1988 1934; baseline -0.001 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_257' title='bbox 558 1896 636 1926; x_wconf 96'>kasd</span>
|
||||
<span class='ocrx_word' id='word_1_258' title='bbox 648 1896 837 1934; x_wconf 96'>gubergren,</span>
|
||||
<span class='ocrx_word' id='word_1_259' title='bbox 853 1906 894 1926; x_wconf 96'>no</span>
|
||||
<span class='ocrx_word' id='word_1_260' title='bbox 907 1906 961 1926; x_wconf 96'>sea</span>
|
||||
<span class='ocrx_word' id='word_1_261' title='bbox 973 1896 1134 1926; x_wconf 96'>takimata</span>
|
||||
<span class='ocrx_word' id='word_1_262' title='bbox 1146 1900 1277 1926; x_wconf 96'>sanctus</span>
|
||||
<span class='ocrx_word' id='word_1_263' title='bbox 1290 1899 1338 1926; x_wconf 96'>est</span>
|
||||
<span class='ocrx_word' id='word_1_264' title='bbox 1352 1897 1466 1926; x_wconf 95'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_265' title='bbox 1478 1897 1585 1933; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_266' title='bbox 1598 1896 1688 1926; x_wconf 95'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_267' title='bbox 1700 1897 1741 1926; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_268' title='bbox 1755 1900 1851 1926; x_wconf 96'>amet.</span>
|
||||
<span class='ocrx_word' id='word_1_269' title='bbox 1874 1897 1988 1926; x_wconf 96'>Lorem</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_27' title="bbox 559 1946 1987 1984; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_270' title='bbox 559 1947 665 1983; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_271' title='bbox 688 1946 778 1976; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_272' title='bbox 801 1947 841 1976; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_273' title='bbox 866 1949 963 1983; x_wconf 96'>amet,</span>
|
||||
<span class='ocrx_word' id='word_1_274' title='bbox 989 1949 1174 1976; x_wconf 96'>consetetur</span>
|
||||
<span class='ocrx_word' id='word_1_275' title='bbox 1197 1946 1380 1984; x_wconf 96'>sadipscing</span>
|
||||
<span class='ocrx_word' id='word_1_276' title='bbox 1402 1946 1483 1983; x_wconf 96'>elitr,</span>
|
||||
<span class='ocrx_word' id='word_1_277' title='bbox 1510 1946 1565 1976; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_278' title='bbox 1588 1946 1676 1976; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_279' title='bbox 1699 1956 1841 1984; x_wconf 96'>nonumy</span>
|
||||
<span class='ocrx_word' id='word_1_280' title='bbox 1863 1946 1987 1976; x_wconf 96'>eirmod</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_28' title="bbox 558 1995 1986 2034; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_281' title='bbox 558 2000 687 2033; x_wconf 95'>tempor</span>
|
||||
<span class='ocrx_word' id='word_1_282' title='bbox 698 1996 846 2026; x_wconf 96'>invidunt</span>
|
||||
<span class='ocrx_word' id='word_1_283' title='bbox 858 2000 894 2026; x_wconf 96'>ut</span>
|
||||
<span class='ocrx_word' id='word_1_284' title='bbox 907 1996 1016 2026; x_wconf 96'>labore</span>
|
||||
<span class='ocrx_word' id='word_1_285' title='bbox 1028 2000 1059 2026; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_286' title='bbox 1072 1996 1181 2026; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_287' title='bbox 1192 2006 1311 2034; x_wconf 95'>magna</span>
|
||||
<span class='ocrx_word' id='word_1_288' title='bbox 1322 1996 1485 2034; x_wconf 95'>aliquyam</span>
|
||||
<span class='ocrx_word' id='word_1_289' title='bbox 1496 1999 1575 2033; x_wconf 96'>erat,</span>
|
||||
<span class='ocrx_word' id='word_1_290' title='bbox 1589 1996 1645 2026; x_wconf 96'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_291' title='bbox 1656 1996 1744 2026; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_292' title='bbox 1755 1996 1921 2033; x_wconf 96'>voluptua.</span>
|
||||
<span class='ocrx_word' id='word_1_293' title='bbox 1943 1995 1986 2026; x_wconf 96'>At</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_29' title="bbox 558 2045 1985 2083; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_294' title='bbox 558 2055 632 2075; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_295' title='bbox 646 2055 699 2075; x_wconf 96'>eos</span>
|
||||
<span class='ocrx_word' id='word_1_296' title='bbox 713 2048 745 2075; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_297' title='bbox 760 2055 910 2075; x_wconf 95'>accusam</span>
|
||||
<span class='ocrx_word' id='word_1_298' title='bbox 924 2048 955 2075; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_299' title='bbox 967 2046 1057 2083; x_wconf 96'>justo</span>
|
||||
<span class='ocrx_word' id='word_1_300' title='bbox 1071 2045 1135 2075; x_wconf 96'>duo</span>
|
||||
<span class='ocrx_word' id='word_1_301' title='bbox 1149 2045 1274 2075; x_wconf 96'>dolores</span>
|
||||
<span class='ocrx_word' id='word_1_302' title='bbox 1288 2049 1319 2075; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_303' title='bbox 1334 2055 1372 2075; x_wconf 96'>ea</span>
|
||||
<span class='ocrx_word' id='word_1_304' title='bbox 1385 2045 1507 2075; x_wconf 96'>rebum.</span>
|
||||
<span class='ocrx_word' id='word_1_305' title='bbox 1531 2045 1600 2075; x_wconf 96'>Stet</span>
|
||||
<span class='ocrx_word' id='word_1_306' title='bbox 1615 2045 1692 2075; x_wconf 96'>clita</span>
|
||||
<span class='ocrx_word' id='word_1_307' title='bbox 1705 2045 1783 2075; x_wconf 96'>kasd</span>
|
||||
<span class='ocrx_word' id='word_1_308' title='bbox 1796 2045 1985 2083; x_wconf 96'>gubergren,</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_30' title="bbox 559 2095 1987 2132; baseline -0.001 -7; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_309' title='bbox 559 2105 600 2125; x_wconf 97'>no</span>
|
||||
<span class='ocrx_word' id='word_1_310' title='bbox 619 2105 673 2125; x_wconf 96'>sea</span>
|
||||
<span class='ocrx_word' id='word_1_311' title='bbox 690 2095 852 2125; x_wconf 96'>takimata</span>
|
||||
<span class='ocrx_word' id='word_1_312' title='bbox 869 2098 1001 2125; x_wconf 96'>sanctus</span>
|
||||
<span class='ocrx_word' id='word_1_313' title='bbox 1019 2098 1067 2125; x_wconf 96'>est</span>
|
||||
<span class='ocrx_word' id='word_1_314' title='bbox 1087 2096 1201 2125; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_315' title='bbox 1219 2096 1326 2132; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_316' title='bbox 1344 2095 1434 2125; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_317' title='bbox 1453 2096 1493 2125; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_318' title='bbox 1513 2099 1609 2125; x_wconf 96'>amet.</span>
|
||||
<span class='ocrx_word' id='word_1_319' title='bbox 1640 2096 1754 2125; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_320' title='bbox 1772 2096 1879 2132; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_321' title='bbox 1897 2095 1987 2125; x_wconf 96'>dolor</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_31' title="bbox 559 2145 1986 2183; baseline -0.001 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_322' title='bbox 559 2146 599 2175; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_323' title='bbox 616 2148 713 2182; x_wconf 96'>amet,</span>
|
||||
<span class='ocrx_word' id='word_1_324' title='bbox 730 2148 915 2175; x_wconf 96'>consetetur</span>
|
||||
<span class='ocrx_word' id='word_1_325' title='bbox 930 2145 1113 2183; x_wconf 96'>sadipscing</span>
|
||||
<span class='ocrx_word' id='word_1_326' title='bbox 1127 2145 1209 2182; x_wconf 96'>elitr,</span>
|
||||
<span class='ocrx_word' id='word_1_327' title='bbox 1226 2145 1281 2175; x_wconf 95'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_328' title='bbox 1297 2145 1385 2175; x_wconf 95'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_329' title='bbox 1400 2155 1541 2183; x_wconf 96'>nonumy</span>
|
||||
<span class='ocrx_word' id='word_1_330' title='bbox 1556 2145 1680 2175; x_wconf 96'>eirmod</span>
|
||||
<span class='ocrx_word' id='word_1_331' title='bbox 1695 2148 1823 2182; x_wconf 96'>tempor</span>
|
||||
<span class='ocrx_word' id='word_1_332' title='bbox 1838 2145 1986 2175; x_wconf 96'>invidunt</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_32' title="bbox 559 2194 1986 2233; baseline 0 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_333' title='bbox 559 2199 594 2225; x_wconf 95'>ut</span>
|
||||
<span class='ocrx_word' id='word_1_334' title='bbox 615 2195 724 2225; x_wconf 95'>labore</span>
|
||||
<span class='ocrx_word' id='word_1_335' title='bbox 744 2198 775 2225; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_336' title='bbox 796 2195 904 2225; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_337' title='bbox 924 2205 1042 2233; x_wconf 96'>magna</span>
|
||||
<span class='ocrx_word' id='word_1_338' title='bbox 1061 2195 1224 2233; x_wconf 95'>aliquyam</span>
|
||||
<span class='ocrx_word' id='word_1_339' title='bbox 1243 2198 1322 2232; x_wconf 95'>erat,</span>
|
||||
<span class='ocrx_word' id='word_1_340' title='bbox 1344 2195 1399 2225; x_wconf 95'>sed</span>
|
||||
<span class='ocrx_word' id='word_1_341' title='bbox 1419 2195 1507 2225; x_wconf 96'>diam</span>
|
||||
<span class='ocrx_word' id='word_1_342' title='bbox 1526 2195 1692 2232; x_wconf 96'>voluptua.</span>
|
||||
<span class='ocrx_word' id='word_1_343' title='bbox 1725 2194 1768 2225; x_wconf 96'>At</span>
|
||||
<span class='ocrx_word' id='word_1_344' title='bbox 1788 2205 1862 2225; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_345' title='bbox 1882 2205 1935 2225; x_wconf 96'>eos</span>
|
||||
<span class='ocrx_word' id='word_1_346' title='bbox 1955 2198 1986 2225; x_wconf 96'>et</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_33' title="bbox 559 2245 1988 2283; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_347' title='bbox 559 2255 709 2275; x_wconf 96'>accusam</span>
|
||||
<span class='ocrx_word' id='word_1_348' title='bbox 728 2249 759 2275; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_349' title='bbox 777 2246 866 2283; x_wconf 96'>justo</span>
|
||||
<span class='ocrx_word' id='word_1_350' title='bbox 886 2245 951 2275; x_wconf 95'>duo</span>
|
||||
<span class='ocrx_word' id='word_1_351' title='bbox 970 2245 1095 2275; x_wconf 96'>dolores</span>
|
||||
<span class='ocrx_word' id='word_1_352' title='bbox 1114 2248 1146 2275; x_wconf 95'>et</span>
|
||||
<span class='ocrx_word' id='word_1_353' title='bbox 1166 2255 1204 2275; x_wconf 95'>ea</span>
|
||||
<span class='ocrx_word' id='word_1_354' title='bbox 1223 2245 1345 2275; x_wconf 95'>rebum.</span>
|
||||
<span class='ocrx_word' id='word_1_355' title='bbox 1380 2245 1449 2275; x_wconf 96'>Stet</span>
|
||||
<span class='ocrx_word' id='word_1_356' title='bbox 1470 2245 1546 2275; x_wconf 96'>clita</span>
|
||||
<span class='ocrx_word' id='word_1_357' title='bbox 1565 2245 1642 2275; x_wconf 96'>kasd</span>
|
||||
<span class='ocrx_word' id='word_1_358' title='bbox 1662 2245 1851 2283; x_wconf 95'>gubergren,</span>
|
||||
<span class='ocrx_word' id='word_1_359' title='bbox 1873 2255 1914 2275; x_wconf 96'>no</span>
|
||||
<span class='ocrx_word' id='word_1_360' title='bbox 1934 2255 1988 2275; x_wconf 96'>sea</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_34' title="bbox 558 2295 1460 2332; baseline 0 -7; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_361' title='bbox 558 2295 720 2325; x_wconf 96'>takimata</span>
|
||||
<span class='ocrx_word' id='word_1_362' title='bbox 735 2298 866 2325; x_wconf 96'>sanctus</span>
|
||||
<span class='ocrx_word' id='word_1_363' title='bbox 882 2299 930 2325; x_wconf 96'>est</span>
|
||||
<span class='ocrx_word' id='word_1_364' title='bbox 947 2296 1061 2325; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_365' title='bbox 1077 2296 1184 2332; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_366' title='bbox 1200 2295 1290 2325; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_367' title='bbox 1306 2296 1346 2325; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_368' title='bbox 1364 2298 1460 2325; x_wconf 96'>amet.</span>
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<p class='ocr_par' id='par_1_9' lang='eng' title="bbox 558 2344 1987 2532">
|
||||
<span class='ocr_line' id='line_1_35' title="bbox 621 2344 1987 2381; baseline 0.001 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_369' title='bbox 621 2345 701 2374; x_wconf 96'>Duis</span>
|
||||
<span class='ocrx_word' id='word_1_370' title='bbox 716 2347 827 2374; x_wconf 96'>autem</span>
|
||||
<span class='ocrx_word' id='word_1_371' title='bbox 841 2344 890 2374; x_wconf 96'>vel</span>
|
||||
<span class='ocrx_word' id='word_1_372' title='bbox 905 2354 979 2374; x_wconf 96'>eum</span>
|
||||
<span class='ocrx_word' id='word_1_373' title='bbox 994 2345 1088 2374; x_wconf 96'>iriure</span>
|
||||
<span class='ocrx_word' id='word_1_374' title='bbox 1103 2344 1193 2374; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_375' title='bbox 1208 2345 1241 2373; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_376' title='bbox 1256 2344 1418 2374; x_wconf 95'>hendrerit</span>
|
||||
<span class='ocrx_word' id='word_1_377' title='bbox 1434 2345 1467 2373; x_wconf 95'>in</span>
|
||||
<span class='ocrx_word' id='word_1_378' title='bbox 1481 2344 1653 2381; x_wconf 96'>vulputate</span>
|
||||
<span class='ocrx_word' id='word_1_379' title='bbox 1668 2344 1743 2374; x_wconf 96'>velit</span>
|
||||
<span class='ocrx_word' id='word_1_380' title='bbox 1759 2354 1826 2374; x_wconf 96'>esse</span>
|
||||
<span class='ocrx_word' id='word_1_381' title='bbox 1841 2344 1987 2374; x_wconf 96'>molestie</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_36' title="bbox 559 2394 1987 2432; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_382' title='bbox 559 2397 745 2431; x_wconf 96'>consequat,</span>
|
||||
<span class='ocrx_word' id='word_1_383' title='bbox 766 2394 815 2424; x_wconf 95'>vel</span>
|
||||
<span class='ocrx_word' id='word_1_384' title='bbox 835 2394 925 2424; x_wconf 96'>illum</span>
|
||||
<span class='ocrx_word' id='word_1_385' title='bbox 944 2394 1053 2424; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_386' title='bbox 1072 2404 1112 2424; x_wconf 96'>eu</span>
|
||||
<span class='ocrx_word' id='word_1_387' title='bbox 1131 2394 1251 2432; x_wconf 96'>feugiat</span>
|
||||
<span class='ocrx_word' id='word_1_388' title='bbox 1272 2394 1359 2424; x_wconf 96'>nulla</span>
|
||||
<span class='ocrx_word' id='word_1_389' title='bbox 1378 2394 1505 2424; x_wconf 96'>facilisis</span>
|
||||
<span class='ocrx_word' id='word_1_390' title='bbox 1526 2397 1559 2424; x_wconf 96'>at</span>
|
||||
<span class='ocrx_word' id='word_1_391' title='bbox 1579 2404 1653 2424; x_wconf 96'>vero</span>
|
||||
<span class='ocrx_word' id='word_1_392' title='bbox 1673 2404 1742 2424; x_wconf 96'>eros</span>
|
||||
<span class='ocrx_word' id='word_1_393' title='bbox 1762 2397 1793 2424; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_394' title='bbox 1814 2404 1987 2424; x_wconf 96'>accumsan</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_37' title="bbox 558 2444 1986 2482; baseline 0 -8; x_size 38; x_descenders 8; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_395' title='bbox 558 2447 590 2474; x_wconf 96'>et</span>
|
||||
<span class='ocrx_word' id='word_1_396' title='bbox 609 2445 695 2474; x_wconf 96'>iusto</span>
|
||||
<span class='ocrx_word' id='word_1_397' title='bbox 713 2444 788 2474; x_wconf 96'>odio</span>
|
||||
<span class='ocrx_word' id='word_1_398' title='bbox 807 2444 974 2482; x_wconf 96'>dignissim</span>
|
||||
<span class='ocrx_word' id='word_1_399' title='bbox 992 2445 1046 2481; x_wconf 96'>qui</span>
|
||||
<span class='ocrx_word' id='word_1_400' title='bbox 1064 2444 1190 2474; x_wconf 96'>blandit</span>
|
||||
<span class='ocrx_word' id='word_1_401' title='bbox 1210 2447 1358 2481; x_wconf 96'>praesent</span>
|
||||
<span class='ocrx_word' id='word_1_402' title='bbox 1378 2444 1544 2481; x_wconf 95'>luptatum</span>
|
||||
<span class='ocrx_word' id='word_1_403' title='bbox 1562 2444 1636 2473; x_wconf 95'>zzril</span>
|
||||
<span class='ocrx_word' id='word_1_404' title='bbox 1655 2444 1773 2474; x_wconf 96'>delenit</span>
|
||||
<span class='ocrx_word' id='word_1_405' title='bbox 1793 2454 1896 2482; x_wconf 96'>augue</span>
|
||||
<span class='ocrx_word' id='word_1_406' title='bbox 1915 2444 1986 2474; x_wconf 96'>duis</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_38' title="bbox 559 2494 1628 2532; baseline 0 -8; x_size 37; x_descenders 7; x_ascenders 10">
|
||||
<span class='ocrx_word' id='word_1_407' title='bbox 559 2494 667 2524; x_wconf 96'>dolore</span>
|
||||
<span class='ocrx_word' id='word_1_408' title='bbox 683 2497 715 2524; x_wconf 96'>te</span>
|
||||
<span class='ocrx_word' id='word_1_409' title='bbox 731 2494 851 2532; x_wconf 96'>feugait</span>
|
||||
<span class='ocrx_word' id='word_1_410' title='bbox 869 2494 956 2524; x_wconf 96'>nulla</span>
|
||||
<span class='ocrx_word' id='word_1_411' title='bbox 971 2494 1092 2524; x_wconf 96'>facilisi.</span>
|
||||
<span class='ocrx_word' id='word_1_412' title='bbox 1115 2495 1229 2524; x_wconf 96'>Lorem</span>
|
||||
<span class='ocrx_word' id='word_1_413' title='bbox 1245 2495 1352 2531; x_wconf 96'>ipsum</span>
|
||||
<span class='ocrx_word' id='word_1_414' title='bbox 1367 2494 1457 2524; x_wconf 96'>dolor</span>
|
||||
<span class='ocrx_word' id='word_1_415' title='bbox 1473 2495 1514 2524; x_wconf 96'>sit</span>
|
||||
<span class='ocrx_word' id='word_1_416' title='bbox 1532 2497 1628 2531; x_wconf 96'>amet,</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
420
crates/kreuzberg/test_data/hocr/v4_embedded_tables.hocr
Normal file
420
crates/kreuzberg/test_data/hocr/v4_embedded_tables.hocr
Normal file
@@ -0,0 +1,420 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
|
||||
<meta name='ocr-system' content='tesseract 5.5.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_dir ocrp_lang ocrp_wconf'/>
|
||||
</head>
|
||||
<body>
|
||||
<div class='ocr_page' id='page_1' title='image "/var/folders/88/hg20130j1lv82jbjnvw0yq5m0000gn/T/tess_it24i44o_input.PNG"; bbox 0 0 1949 2835; ppageno 0; scan_res 70 70'>
|
||||
<div class='ocr_carea' id='block_1_1' title="bbox 166 187 1315 214">
|
||||
<p class='ocr_par' id='par_1_1' lang='eng' title="bbox 166 187 1315 214">
|
||||
<span class='ocr_line' id='line_1_1' title="bbox 166 187 1315 214; baseline 0 -6; x_size 27; x_descenders 6; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_1' title='bbox 166 188 212 208; x_wconf 96'>454</span>
|
||||
<span class='ocrx_word' id='word_1_2' title='bbox 611 188 633 208; x_wconf 91'>O.</span>
|
||||
<span class='ocrx_word' id='word_1_3' title='bbox 644 188 716 211; x_wconf 93'>Sanni,</span>
|
||||
<span class='ocrx_word' id='word_1_4' title='bbox 726 188 781 208; x_wconf 57'>A.P.I.</span>
|
||||
<span class='ocrx_word' id='word_1_5' title='bbox 793 187 887 214; x_wconf 91'>Popoola</span>
|
||||
<span class='ocrx_word' id='word_1_6' title='bbox 896 187 907 214; x_wconf 93'>/</span>
|
||||
<span class='ocrx_word' id='word_1_7' title='bbox 916 188 972 208; x_wconf 96'>Data</span>
|
||||
<span class='ocrx_word' id='word_1_8' title='bbox 984 187 1005 208; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_9' title='bbox 1016 187 1110 214; x_wconf 94'>Brief</span>
|
||||
<span class='ocrx_word' id='word_1_10' title='bbox 1085 183 1112 218; x_wconf 94'>22</span>
|
||||
<span class='ocrx_word' id='word_1_11' title='bbox 1121 187 1198 213; x_wconf 96'>(2019)</span>
|
||||
<span class='ocrx_word' id='word_1_12' title='bbox 1208 188 1315 208; x_wconf 96'>451-457</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_separator' id='block_1_2' title="bbox 582 291 586 834"></div>
|
||||
<div class='ocr_separator' id='block_1_3' title="bbox 586 834 1281 837"></div>
|
||||
<div class='ocr_carea' id='block_1_4' title="bbox 466 282 1459 900">
|
||||
<p class='ocr_par' id='par_1_2' lang='eng' title="bbox 466 282 1459 891">
|
||||
<span class='ocr_line' id='line_1_2' title="bbox 521 282 583 304; baseline 0 0; x_size 26.321428; x_descenders 4.3214288; x_ascenders 9.0357141">
|
||||
<span class='ocrx_word' id='word_1_13' title='bbox 521 282 583 304; x_wconf 62'>154</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_3' title="bbox 544 349 583 370; baseline 0 -8; x_size 15.553572; x_descenders 2.5535715; x_ascenders 5.3392859">
|
||||
<span class='ocrx_word' id='word_1_14' title='bbox 544 349 556 370; x_wconf 84'>1</span>
|
||||
<span class='ocrx_word' id='word_1_15' title='bbox 576 359 583 362; x_wconf 94'>4</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_4' title="bbox 518 418 583 440; baseline 0 0; x_size 26.321428; x_descenders 4.3214288; x_ascenders 9.0357141">
|
||||
<span class='ocrx_word' id='word_1_16' title='bbox 518 418 556 440; x_wconf 76'>05</span>
|
||||
<span class='ocrx_word' id='word_1_17' title='bbox 575 426 583 429; x_wconf 78'>4</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_5' title="bbox 466 464 1414 514; baseline 0 0; x_size 59.333336; x_descenders 9.333334; x_ascenders 22">
|
||||
<span class='ocrx_word' id='word_1_18' title='bbox 466 486 493 514; x_wconf 72'>S</span>
|
||||
<span class='ocrx_word' id='word_1_19' title='bbox 541 485 583 506; x_wconf 62'>of</span>
|
||||
<span class='ocrx_word' id='word_1_20' title='bbox 1399 464 1414 486; x_wconf 5'>°</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_6' title="bbox 466 536 584 586; baseline 0 -11; x_size 46.636364; x_descenders 11; x_ascenders 14.636363">
|
||||
<span class='ocrx_word' id='word_1_21' title='bbox 466 536 554 586; x_wconf 59'>2-05</span>
|
||||
<span class='ocrx_word' id='word_1_22' title='bbox 576 562 584 565; x_wconf 85'>|</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_7' title="bbox 468 582 1397 642; baseline 0 -23; x_size 33.5; x_descenders 5.5; x_ascenders 11.5">
|
||||
<span class='ocrx_word' id='word_1_23' title='bbox 468 590 488 619; x_wconf 85'>2</span>
|
||||
<span class='ocrx_word' id='word_1_24' title='bbox 544 621 556 642; x_wconf 0'>,</span>
|
||||
<span class='ocrx_word' id='word_1_25' title='bbox 1290 588 1356 596; x_wconf 64'>——</span>
|
||||
<span class='ocrx_word' id='word_1_26' title='bbox 1368 582 1397 609; x_wconf 76'>6</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_8' title="bbox 467 621 1397 671; baseline 0 -11; x_size 22; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_27' title='bbox 467 621 488 652; x_wconf 72'>é</span>
|
||||
<span class='ocrx_word' id='word_1_28' title='bbox 1290 652 1339 660; x_wconf 78'>—</span>
|
||||
<span class='ocrx_word' id='word_1_29' title='bbox 1369 643 1397 671; x_wconf 92'>2g</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_9' title="bbox 507 690 1459 727; baseline -0.001 0; x_size 32.333332; x_descenders 5.3333335; x_ascenders 11">
|
||||
<span class='ocrx_word' id='word_1_30' title='bbox 507 690 554 712; x_wconf 68'>15</span>
|
||||
<span class='ocrx_word' id='word_1_31' title='bbox 575 698 583 701; x_wconf 68'>4</span>
|
||||
<span class='ocrx_word' id='word_1_32' title='bbox 1290 714 1356 721; x_wconf 84'>—</span>
|
||||
<span class='ocrx_word' id='word_1_33' title='bbox 1368 703 1459 727; x_wconf 89'>Control</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_10' title="bbox 507 823 1185 846; baseline 0.001 -1; x_size 26.321428; x_descenders 4.3214288; x_ascenders 9.0357141">
|
||||
<span class='ocrx_word' id='word_1_34' title='bbox 507 823 554 845; x_wconf 84'>-2.5</span>
|
||||
<span class='ocrx_word' id='word_1_35' title='bbox 575 834 586 846; x_wconf 87'>+</span>
|
||||
<span class='ocrx_word' id='word_1_36' title='bbox 779 836 785 846; x_wconf 59'>T</span>
|
||||
<span class='ocrx_word' id='word_1_37' title='bbox 975 836 985 845; x_wconf 85'>+</span>
|
||||
<span class='ocrx_word' id='word_1_38' title='bbox 1174 836 1185 846; x_wconf 85'>+</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_11' title="bbox 518 866 1200 900; baseline 0 -9; x_size 25.125; x_descenders 4.125; x_ascenders 8.625">
|
||||
<span class='ocrx_word' id='word_1_39' title='bbox 518 866 650 900; x_wconf 90'>0.0000001</span>
|
||||
<span class='ocrx_word' id='word_1_40' title='bbox 735 866 834 900; x_wconf 96'>0.00001</span>
|
||||
<span class='ocrx_word' id='word_1_41' title='bbox 949 866 1019 900; x_wconf 96'>0.001</span>
|
||||
<span class='ocrx_word' id='word_1_42' title='bbox 1163 870 1200 891; x_wconf 95'>0.1</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_5' title="bbox 776 918 1086 948">
|
||||
<p class='ocr_par' id='par_1_3' lang='eng' title="bbox 776 918 1086 948">
|
||||
<span class='ocr_line' id='line_1_12' title="bbox 776 918 1086 948; baseline 0 -6; x_size 29; x_descenders 5; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_43' title='bbox 776 921 874 943; x_wconf 96'>Current</span>
|
||||
<span class='ocrx_word' id='word_1_44' title='bbox 880 919 974 948; x_wconf 93'>Density</span>
|
||||
<span class='ocrx_word' id='word_1_45' title='bbox 984 918 1086 947; x_wconf 92'>(A/cm2)</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_6' title="bbox 176 982 1748 1009">
|
||||
<p class='ocr_par' id='par_1_4' lang='eng' title="bbox 176 982 1748 1009">
|
||||
<span class='ocr_line' id='line_1_13' title="bbox 176 982 1748 1009; baseline 0 -6; x_size 27; x_descenders 6; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_46' title='bbox 176 982 221 1009; x_wconf 96'>Fig.</span>
|
||||
<span class='ocrx_word' id='word_1_47' title='bbox 231 983 253 1003; x_wconf 95'>4.</span>
|
||||
<span class='ocrx_word' id='word_1_48' title='bbox 267 982 355 1003; x_wconf 96'>Anodic</span>
|
||||
<span class='ocrx_word' id='word_1_49' title='bbox 365 982 410 1003; x_wconf 96'>and</span>
|
||||
<span class='ocrx_word' id='word_1_50' title='bbox 422 982 527 1003; x_wconf 96'>cathodic</span>
|
||||
<span class='ocrx_word' id='word_1_51' title='bbox 538 982 688 1009; x_wconf 96'>polarization</span>
|
||||
<span class='ocrx_word' id='word_1_52' title='bbox 699 988 766 1003; x_wconf 96'>curve</span>
|
||||
<span class='ocrx_word' id='word_1_53' title='bbox 778 982 803 1003; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_54' title='bbox 812 982 917 1003; x_wconf 96'>stainless</span>
|
||||
<span class='ocrx_word' id='word_1_55' title='bbox 929 982 985 1003; x_wconf 96'>steel</span>
|
||||
<span class='ocrx_word' id='word_1_56' title='bbox 997 982 1019 1003; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_57' title='bbox 1030 983 1066 1004; x_wconf 92'>0.5</span>
|
||||
<span class='ocrx_word' id='word_1_58' title='bbox 1074 983 1097 1003; x_wconf 92'>M</span>
|
||||
<span class='ocrx_word' id='word_1_59' title='bbox 1109 983 1186 1008; x_wconf 87'>H2SO,</span>
|
||||
<span class='ocrx_word' id='word_1_60' title='bbox 1197 982 1297 1003; x_wconf 96'>solution</span>
|
||||
<span class='ocrx_word' id='word_1_61' title='bbox 1308 982 1331 1003; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_62' title='bbox 1342 982 1381 1003; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_63' title='bbox 1393 988 1503 1009; x_wconf 96'>presence</span>
|
||||
<span class='ocrx_word' id='word_1_64' title='bbox 1514 982 1559 1003; x_wconf 95'>and</span>
|
||||
<span class='ocrx_word' id='word_1_65' title='bbox 1571 982 1669 1003; x_wconf 96'>absence</span>
|
||||
<span class='ocrx_word' id='word_1_66' title='bbox 1680 982 1706 1003; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_67' title='bbox 1714 983 1748 1004; x_wconf 96'>ES.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_7' title="bbox 165 1074 1734 1488">
|
||||
<p class='ocr_par' id='par_1_5' lang='eng' title="bbox 165 1074 1734 1488">
|
||||
<span class='ocr_line' id='line_1_14' title="bbox 165 1074 257 1095; baseline 0 0; x_size 26.789474; x_descenders 5.789474; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_68' title='bbox 165 1074 234 1095; x_wconf 96'>Table</span>
|
||||
<span class='ocrx_word' id='word_1_69' title='bbox 246 1075 257 1095; x_wconf 96'>1</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_15' title="bbox 167 1110 1596 1137; baseline 0 -6; x_size 27; x_descenders 6; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_70' title='bbox 167 1110 379 1137; x_wconf 91'>Potentiodynamic</span>
|
||||
<span class='ocrx_word' id='word_1_71' title='bbox 390 1110 539 1137; x_wconf 96'>polarization</span>
|
||||
<span class='ocrx_word' id='word_1_72' title='bbox 550 1110 603 1131; x_wconf 96'>data</span>
|
||||
<span class='ocrx_word' id='word_1_73' title='bbox 614 1110 649 1131; x_wconf 97'>for</span>
|
||||
<span class='ocrx_word' id='word_1_74' title='bbox 659 1110 764 1131; x_wconf 96'>stainless</span>
|
||||
<span class='ocrx_word' id='word_1_75' title='bbox 776 1110 833 1131; x_wconf 96'>steel</span>
|
||||
<span class='ocrx_word' id='word_1_76' title='bbox 844 1110 867 1131; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_77' title='bbox 878 1110 917 1131; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_78' title='bbox 929 1110 1027 1131; x_wconf 96'>absence</span>
|
||||
<span class='ocrx_word' id='word_1_79' title='bbox 1038 1110 1083 1131; x_wconf 96'>and</span>
|
||||
<span class='ocrx_word' id='word_1_80' title='bbox 1095 1116 1205 1137; x_wconf 96'>presence</span>
|
||||
<span class='ocrx_word' id='word_1_81' title='bbox 1216 1110 1242 1131; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_82' title='bbox 1250 1111 1278 1131; x_wconf 96'>ES</span>
|
||||
<span class='ocrx_word' id='word_1_83' title='bbox 1290 1110 1312 1131; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_84' title='bbox 1323 1111 1359 1132; x_wconf 91'>0.5</span>
|
||||
<span class='ocrx_word' id='word_1_85' title='bbox 1367 1111 1390 1131; x_wconf 91'>M</span>
|
||||
<span class='ocrx_word' id='word_1_86' title='bbox 1402 1111 1478 1135; x_wconf 66'>H2SO,</span>
|
||||
<span class='ocrx_word' id='word_1_87' title='bbox 1489 1110 1596 1132; x_wconf 96'>solution.</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_16' title="bbox 191 1177 1665 1211; baseline 0 -9; x_size 26; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_88' title='bbox 191 1177 310 1211; x_wconf 96'>Inhibitor</span>
|
||||
<span class='ocrx_word' id='word_1_89' title='bbox 484 1181 514 1202; x_wconf 93'>be</span>
|
||||
<span class='ocrx_word' id='word_1_90' title='bbox 525 1181 615 1208; x_wconf 91'>(V/dec)</span>
|
||||
<span class='ocrx_word' id='word_1_91' title='bbox 678 1181 709 1202; x_wconf 93'>ba</span>
|
||||
<span class='ocrx_word' id='word_1_92' title='bbox 720 1181 810 1208; x_wconf 92'>(V/dec)</span>
|
||||
<span class='ocrx_word' id='word_1_93' title='bbox 874 1182 945 1202; x_wconf 90'>Ecorr</span>
|
||||
<span class='ocrx_word' id='word_1_94' title='bbox 955 1181 992 1207; x_wconf 92'>(V)</span>
|
||||
<span class='ocrx_word' id='word_1_95' title='bbox 1056 1181 1119 1202; x_wconf 93'>icorr</span>
|
||||
<span class='ocrx_word' id='word_1_96' title='bbox 1129 1177 1226 1208; x_wconf 53'>(A/cm)</span>
|
||||
<span class='ocrx_word' id='word_1_97' title='bbox 1290 1177 1451 1211; x_wconf 96'>Polarization</span>
|
||||
<span class='ocrx_word' id='word_1_98' title='bbox 1536 1181 1665 1202; x_wconf 96'>Corrosion</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_17' title="bbox 191 1216 1734 1243; baseline 0 -6; x_size 26; x_descenders 5; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_99' title='bbox 191 1216 376 1237; x_wconf 93'>concentration</span>
|
||||
<span class='ocrx_word' id='word_1_100' title='bbox 387 1216 421 1243; x_wconf 92'>(g)</span>
|
||||
<span class='ocrx_word' id='word_1_101' title='bbox 1290 1216 1421 1237; x_wconf 92'>resistance</span>
|
||||
<span class='ocrx_word' id='word_1_102' title='bbox 1432 1216 1472 1242; x_wconf 92'>(Q)</span>
|
||||
<span class='ocrx_word' id='word_1_103' title='bbox 1536 1219 1587 1237; x_wconf 93'>rate</span>
|
||||
<span class='ocrx_word' id='word_1_104' title='bbox 1598 1216 1734 1243; x_wconf 85'>(mm/year)</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_18' title="bbox 191 1288 1617 1309; baseline 0 -1; x_size 27.982456; x_descenders 5.9824562; x_ascenders 6.5">
|
||||
<span class='ocrx_word' id='word_1_105' title='bbox 191 1288 205 1308; x_wconf 88'>0</span>
|
||||
<span class='ocrx_word' id='word_1_106' title='bbox 485 1288 568 1309; x_wconf 96'>0.0335</span>
|
||||
<span class='ocrx_word' id='word_1_107' title='bbox 679 1288 762 1309; x_wconf 95'>0.0409</span>
|
||||
<span class='ocrx_word' id='word_1_108' title='bbox 878 1301 895 1302; x_wconf 0'>—</span>
|
||||
<span class='ocrx_word' id='word_1_109' title='bbox 901 1288 984 1309; x_wconf 0'>0.9393</span>
|
||||
<span class='ocrx_word' id='word_1_110' title='bbox 1056 1288 1140 1309; x_wconf 96'>0.0003</span>
|
||||
<span class='ocrx_word' id='word_1_111' title='bbox 1307 1288 1403 1309; x_wconf 96'>24.0910</span>
|
||||
<span class='ocrx_word' id='word_1_112' title='bbox 1536 1288 1617 1309; x_wconf 96'>2.8163</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_19' title="bbox 191 1324 1620 1345; baseline 0 -1; x_size 27.982456; x_descenders 5.9824562; x_ascenders 6.5">
|
||||
<span class='ocrx_word' id='word_1_113' title='bbox 191 1324 204 1344; x_wconf 96'>2</span>
|
||||
<span class='ocrx_word' id='word_1_114' title='bbox 487 1324 569 1345; x_wconf 96'>1.9460</span>
|
||||
<span class='ocrx_word' id='word_1_115' title='bbox 679 1324 762 1345; x_wconf 96'>0.0596</span>
|
||||
<span class='ocrx_word' id='word_1_116' title='bbox 878 1337 895 1338; x_wconf 0'>—</span>
|
||||
<span class='ocrx_word' id='word_1_117' title='bbox 901 1324 983 1345; x_wconf 0'>0.8276</span>
|
||||
<span class='ocrx_word' id='word_1_118' title='bbox 1056 1324 1141 1345; x_wconf 96'>0.0002</span>
|
||||
<span class='ocrx_word' id='word_1_119' title='bbox 1294 1324 1390 1345; x_wconf 96'>121.440</span>
|
||||
<span class='ocrx_word' id='word_1_120' title='bbox 1538 1324 1620 1345; x_wconf 95'>1.5054</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_20' title="bbox 190 1360 1617 1381; baseline 0 -1; x_size 27.982456; x_descenders 5.9824562; x_ascenders 6.5">
|
||||
<span class='ocrx_word' id='word_1_121' title='bbox 190 1360 205 1380; x_wconf 96'>4</span>
|
||||
<span class='ocrx_word' id='word_1_122' title='bbox 485 1360 565 1381; x_wconf 96'>0.0163</span>
|
||||
<span class='ocrx_word' id='word_1_123' title='bbox 679 1360 762 1381; x_wconf 96'>0.2369</span>
|
||||
<span class='ocrx_word' id='word_1_124' title='bbox 878 1373 895 1374; x_wconf 0'>—</span>
|
||||
<span class='ocrx_word' id='word_1_125' title='bbox 901 1360 984 1381; x_wconf 0'>0.8825</span>
|
||||
<span class='ocrx_word' id='word_1_126' title='bbox 1056 1360 1137 1381; x_wconf 96'>0.0001</span>
|
||||
<span class='ocrx_word' id='word_1_127' title='bbox 1305 1360 1382 1381; x_wconf 89'>42.121</span>
|
||||
<span class='ocrx_word' id='word_1_128' title='bbox 1536 1360 1617 1381; x_wconf 96'>0.9476</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_21' title="bbox 191 1395 1616 1416; baseline 0 -1; x_size 27.982456; x_descenders 5.9824562; x_ascenders 6.5">
|
||||
<span class='ocrx_word' id='word_1_129' title='bbox 191 1395 205 1415; x_wconf 96'>6</span>
|
||||
<span class='ocrx_word' id='word_1_130' title='bbox 485 1395 568 1416; x_wconf 96'>0.3233</span>
|
||||
<span class='ocrx_word' id='word_1_131' title='bbox 679 1395 762 1416; x_wconf 95'>0.0540</span>
|
||||
<span class='ocrx_word' id='word_1_132' title='bbox 878 1408 895 1409; x_wconf 0'>—</span>
|
||||
<span class='ocrx_word' id='word_1_133' title='bbox 901 1395 984 1416; x_wconf 0'>0.8027</span>
|
||||
<span class='ocrx_word' id='word_1_134' title='bbox 1056 1395 1166 1416; x_wconf 92'>5.39E-05</span>
|
||||
<span class='ocrx_word' id='word_1_135' title='bbox 1292 1395 1386 1416; x_wconf 96'>373.180</span>
|
||||
<span class='ocrx_word' id='word_1_136' title='bbox 1536 1395 1616 1416; x_wconf 96'>0.4318</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_22' title="bbox 191 1431 1617 1452; baseline 0 -1; x_size 27.982456; x_descenders 5.9824562; x_ascenders 6.5">
|
||||
<span class='ocrx_word' id='word_1_137' title='bbox 191 1431 204 1451; x_wconf 96'>8</span>
|
||||
<span class='ocrx_word' id='word_1_138' title='bbox 485 1431 566 1452; x_wconf 96'>0.1240</span>
|
||||
<span class='ocrx_word' id='word_1_139' title='bbox 679 1431 762 1452; x_wconf 96'>0.0556</span>
|
||||
<span class='ocrx_word' id='word_1_140' title='bbox 878 1444 895 1445; x_wconf 0'>—</span>
|
||||
<span class='ocrx_word' id='word_1_141' title='bbox 901 1431 985 1452; x_wconf 0'>0.5896</span>
|
||||
<span class='ocrx_word' id='word_1_142' title='bbox 1056 1431 1166 1452; x_wconf 92'>5.46E-05</span>
|
||||
<span class='ocrx_word' id='word_1_143' title='bbox 1290 1431 1390 1452; x_wconf 96'>305.650</span>
|
||||
<span class='ocrx_word' id='word_1_144' title='bbox 1536 1431 1617 1452; x_wconf 96'>0.3772</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_23' title="bbox 193 1467 1617 1488; baseline 0 -1; x_size 27.982456; x_descenders 5.9824562; x_ascenders 6.5">
|
||||
<span class='ocrx_word' id='word_1_145' title='bbox 193 1467 219 1487; x_wconf 96'>10</span>
|
||||
<span class='ocrx_word' id='word_1_146' title='bbox 485 1467 568 1488; x_wconf 96'>0.0382</span>
|
||||
<span class='ocrx_word' id='word_1_147' title='bbox 679 1467 763 1488; x_wconf 96'>0.0086</span>
|
||||
<span class='ocrx_word' id='word_1_148' title='bbox 878 1467 985 1488; x_wconf 87'>—0.5356</span>
|
||||
<span class='ocrx_word' id='word_1_149' title='bbox 1058 1467 1163 1488; x_wconf 90'>1.24E-05</span>
|
||||
<span class='ocrx_word' id='word_1_150' title='bbox 1291 1467 1390 1488; x_wconf 96'>246.080</span>
|
||||
<span class='ocrx_word' id='word_1_151' title='bbox 1536 1467 1617 1488; x_wconf 96'>0.0919</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_8' title="bbox 165 1576 1759 1940">
|
||||
<p class='ocr_par' id='par_1_6' lang='eng' title="bbox 165 1576 1759 1940">
|
||||
<span class='ocr_line' id='line_1_24' title="bbox 215 1576 1758 1609; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_152' title='bbox 215 1576 272 1602; x_wconf 96'>The</span>
|
||||
<span class='ocrx_word' id='word_1_153' title='bbox 285 1576 346 1609; x_wconf 96'>plot</span>
|
||||
<span class='ocrx_word' id='word_1_154' title='bbox 357 1576 389 1602; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_155' title='bbox 398 1576 534 1602; x_wconf 96'>inhibitor</span>
|
||||
<span class='ocrx_word' id='word_1_156' title='bbox 545 1576 761 1602; x_wconf 96'>concentration</span>
|
||||
<span class='ocrx_word' id='word_1_157' title='bbox 774 1584 842 1602; x_wconf 96'>over</span>
|
||||
<span class='ocrx_word' id='word_1_158' title='bbox 853 1576 958 1609; x_wconf 96'>degree</span>
|
||||
<span class='ocrx_word' id='word_1_159' title='bbox 970 1576 1003 1602; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_160' title='bbox 1011 1576 1121 1602; x_wconf 96'>surface</span>
|
||||
<span class='ocrx_word' id='word_1_161' title='bbox 1134 1584 1271 1609; x_wconf 96'>coverage</span>
|
||||
<span class='ocrx_word' id='word_1_162' title='bbox 1282 1584 1382 1602; x_wconf 96'>versus</span>
|
||||
<span class='ocrx_word' id='word_1_163' title='bbox 1395 1576 1531 1602; x_wconf 96'>inhibitor</span>
|
||||
<span class='ocrx_word' id='word_1_164' title='bbox 1542 1576 1758 1602; x_wconf 96'>concentration</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_25' title="bbox 166 1623 1758 1656; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_165' title='bbox 166 1623 244 1656; x_wconf 96'>gives</span>
|
||||
<span class='ocrx_word' id='word_1_166' title='bbox 259 1631 275 1649; x_wconf 96'>a</span>
|
||||
<span class='ocrx_word' id='word_1_167' title='bbox 290 1623 410 1656; x_wconf 96'>straight</span>
|
||||
<span class='ocrx_word' id='word_1_168' title='bbox 425 1623 481 1649; x_wconf 96'>line</span>
|
||||
<span class='ocrx_word' id='word_1_169' title='bbox 496 1631 526 1649; x_wconf 96'>as</span>
|
||||
<span class='ocrx_word' id='word_1_170' title='bbox 542 1623 646 1649; x_wconf 96'>shown</span>
|
||||
<span class='ocrx_word' id='word_1_171' title='bbox 661 1623 689 1649; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_172' title='bbox 705 1623 756 1656; x_wconf 96'>Fig.</span>
|
||||
<span class='ocrx_word' id='word_1_173' title='bbox 772 1624 796 1650; x_wconf 96'>5.</span>
|
||||
<span class='ocrx_word' id='word_1_174' title='bbox 811 1623 868 1649; x_wconf 95'>The</span>
|
||||
<span class='ocrx_word' id='word_1_175' title='bbox 884 1627 982 1656; x_wconf 96'>strong</span>
|
||||
<span class='ocrx_word' id='word_1_176' title='bbox 997 1623 1167 1649; x_wconf 96'>correlation</span>
|
||||
<span class='ocrx_word' id='word_1_177' title='bbox 1182 1623 1290 1649; x_wconf 96'>reveals</span>
|
||||
<span class='ocrx_word' id='word_1_178' title='bbox 1305 1623 1368 1649; x_wconf 96'>that</span>
|
||||
<span class='ocrx_word' id='word_1_179' title='bbox 1382 1631 1436 1656; x_wconf 96'>egg</span>
|
||||
<span class='ocrx_word' id='word_1_180' title='bbox 1451 1623 1522 1649; x_wconf 96'>shell</span>
|
||||
<span class='ocrx_word' id='word_1_181' title='bbox 1537 1623 1705 1656; x_wconf 96'>adsorption</span>
|
||||
<span class='ocrx_word' id='word_1_182' title='bbox 1720 1631 1758 1649; x_wconf 96'>on</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_26' title="bbox 167 1670 1759 1703; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_183' title='bbox 167 1670 298 1696; x_wconf 96'>stainless</span>
|
||||
<span class='ocrx_word' id='word_1_184' title='bbox 313 1670 423 1696; x_wconf 96'>surface</span>
|
||||
<span class='ocrx_word' id='word_1_185' title='bbox 437 1670 466 1696; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_186' title='bbox 480 1671 524 1697; x_wconf 87'>0.5</span>
|
||||
<span class='ocrx_word' id='word_1_187' title='bbox 534 1671 563 1696; x_wconf 87'>M</span>
|
||||
<span class='ocrx_word' id='word_1_188' title='bbox 578 1671 674 1702; x_wconf 85'>H2SO,</span>
|
||||
<span class='ocrx_word' id='word_1_189' title='bbox 688 1670 785 1696; x_wconf 96'>follow</span>
|
||||
<span class='ocrx_word' id='word_1_190' title='bbox 799 1670 947 1703; x_wconf 95'>Langmuir</span>
|
||||
<span class='ocrx_word' id='word_1_191' title='bbox 960 1670 1127 1703; x_wconf 95'>adsorption</span>
|
||||
<span class='ocrx_word' id='word_1_192' title='bbox 1142 1670 1288 1697; x_wconf 96'>isotherm.</span>
|
||||
<span class='ocrx_word' id='word_1_193' title='bbox 1303 1670 1369 1703; x_wconf 96'>Figs.</span>
|
||||
<span class='ocrx_word' id='word_1_194' title='bbox 1384 1671 1440 1696; x_wconf 80'>6-8</span>
|
||||
<span class='ocrx_word' id='word_1_195' title='bbox 1455 1670 1538 1696; x_wconf 96'>show</span>
|
||||
<span class='ocrx_word' id='word_1_196' title='bbox 1550 1670 1600 1696; x_wconf 93'>the</span>
|
||||
<span class='ocrx_word' id='word_1_197' title='bbox 1614 1670 1759 1703; x_wconf 91'>SEM/EDX</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_27' title="bbox 167 1718 1758 1751; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_198' title='bbox 167 1718 277 1744; x_wconf 96'>surface</span>
|
||||
<span class='ocrx_word' id='word_1_199' title='bbox 290 1718 481 1751; x_wconf 96'>morphology</span>
|
||||
<span class='ocrx_word' id='word_1_200' title='bbox 493 1718 615 1751; x_wconf 94'>analysis</span>
|
||||
<span class='ocrx_word' id='word_1_201' title='bbox 629 1718 661 1744; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_202' title='bbox 671 1718 802 1744; x_wconf 96'>stainless</span>
|
||||
<span class='ocrx_word' id='word_1_203' title='bbox 816 1718 894 1745; x_wconf 96'>steel.</span>
|
||||
<span class='ocrx_word' id='word_1_204' title='bbox 908 1718 974 1751; x_wconf 96'>Figs.</span>
|
||||
<span class='ocrx_word' id='word_1_205' title='bbox 999 1719 1005 1744; x_wconf 96'>7</span>
|
||||
<span class='ocrx_word' id='word_1_206' title='bbox 1019 1718 1075 1744; x_wconf 96'>and</span>
|
||||
<span class='ocrx_word' id='word_1_207' title='bbox 1089 1719 1105 1744; x_wconf 95'>8</span>
|
||||
<span class='ocrx_word' id='word_1_208' title='bbox 1120 1726 1167 1744; x_wconf 95'>are</span>
|
||||
<span class='ocrx_word' id='word_1_209' title='bbox 1179 1718 1229 1744; x_wconf 93'>the</span>
|
||||
<span class='ocrx_word' id='word_1_210' title='bbox 1242 1718 1388 1751; x_wconf 93'>SEM/EDX</span>
|
||||
<span class='ocrx_word' id='word_1_211' title='bbox 1400 1718 1508 1751; x_wconf 96'>images</span>
|
||||
<span class='ocrx_word' id='word_1_212' title='bbox 1522 1718 1554 1744; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_213' title='bbox 1563 1718 1613 1744; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_214' title='bbox 1626 1718 1758 1744; x_wconf 96'>stainless</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_28' title="bbox 167 1765 1758 1798; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_215' title='bbox 167 1765 238 1791; x_wconf 96'>steel</span>
|
||||
<span class='ocrx_word' id='word_1_216' title='bbox 252 1765 414 1798; x_wconf 96'>specimens</span>
|
||||
<span class='ocrx_word' id='word_1_217' title='bbox 428 1765 553 1791; x_wconf 96'>without</span>
|
||||
<span class='ocrx_word' id='word_1_218' title='bbox 566 1765 622 1791; x_wconf 96'>and</span>
|
||||
<span class='ocrx_word' id='word_1_219' title='bbox 636 1765 708 1791; x_wconf 96'>with</span>
|
||||
<span class='ocrx_word' id='word_1_220' title='bbox 722 1765 858 1791; x_wconf 96'>inhibitor</span>
|
||||
<span class='ocrx_word' id='word_1_221' title='bbox 871 1765 943 1791; x_wconf 95'>after</span>
|
||||
<span class='ocrx_word' id='word_1_222' title='bbox 955 1765 1064 1798; x_wconf 96'>weight</span>
|
||||
<span class='ocrx_word' id='word_1_223' title='bbox 1077 1765 1134 1791; x_wconf 96'>loss</span>
|
||||
<span class='ocrx_word' id='word_1_224' title='bbox 1148 1765 1328 1798; x_wconf 96'>experiment</span>
|
||||
<span class='ocrx_word' id='word_1_225' title='bbox 1342 1765 1371 1791; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_226' title='bbox 1385 1765 1530 1798; x_wconf 96'>sulphuric</span>
|
||||
<span class='ocrx_word' id='word_1_227' title='bbox 1544 1765 1606 1791; x_wconf 96'>acid</span>
|
||||
<span class='ocrx_word' id='word_1_228' title='bbox 1621 1765 1758 1792; x_wconf 96'>medium.</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_29' title="bbox 165 1812 1758 1845; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_229' title='bbox 165 1812 222 1838; x_wconf 96'>The</span>
|
||||
<span class='ocrx_word' id='word_1_230' title='bbox 238 1812 369 1838; x_wconf 96'>stainless</span>
|
||||
<span class='ocrx_word' id='word_1_231' title='bbox 386 1812 457 1838; x_wconf 96'>steel</span>
|
||||
<span class='ocrx_word' id='word_1_232' title='bbox 473 1812 582 1838; x_wconf 95'>surface</span>
|
||||
<span class='ocrx_word' id='word_1_233' title='bbox 598 1812 744 1838; x_wconf 96'>corrosion</span>
|
||||
<span class='ocrx_word' id='word_1_234' title='bbox 759 1812 880 1845; x_wconf 96'>product</span>
|
||||
<span class='ocrx_word' id='word_1_235' title='bbox 896 1812 971 1845; x_wconf 96'>layer</span>
|
||||
<span class='ocrx_word' id='word_1_236' title='bbox 986 1812 1015 1838; x_wconf 96'>in</span>
|
||||
<span class='ocrx_word' id='word_1_237' title='bbox 1030 1812 1079 1838; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_238' title='bbox 1095 1812 1218 1838; x_wconf 96'>absence</span>
|
||||
<span class='ocrx_word' id='word_1_239' title='bbox 1234 1812 1266 1838; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_240' title='bbox 1278 1812 1414 1838; x_wconf 96'>inhibitor</span>
|
||||
<span class='ocrx_word' id='word_1_241' title='bbox 1427 1820 1487 1838; x_wconf 96'>was</span>
|
||||
<span class='ocrx_word' id='word_1_242' title='bbox 1503 1820 1609 1845; x_wconf 96'>porous</span>
|
||||
<span class='ocrx_word' id='word_1_243' title='bbox 1625 1812 1681 1838; x_wconf 96'>and</span>
|
||||
<span class='ocrx_word' id='word_1_244' title='bbox 1697 1820 1727 1838; x_wconf 95'>as</span>
|
||||
<span class='ocrx_word' id='word_1_245' title='bbox 1743 1820 1758 1838; x_wconf 95'>a</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_30' title="bbox 167 1860 1758 1893; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_246' title='bbox 167 1860 255 1886; x_wconf 96'>result</span>
|
||||
<span class='ocrx_word' id='word_1_247' title='bbox 265 1860 343 1893; x_wconf 96'>gives</span>
|
||||
<span class='ocrx_word' id='word_1_248' title='bbox 356 1868 393 1886; x_wconf 96'>no</span>
|
||||
<span class='ocrx_word' id='word_1_249' title='bbox 406 1860 552 1886; x_wconf 96'>corrosion</span>
|
||||
<span class='ocrx_word' id='word_1_250' title='bbox 563 1860 730 1893; x_wconf 96'>protection.</span>
|
||||
<span class='ocrx_word' id='word_1_251' title='bbox 742 1860 820 1886; x_wconf 96'>With</span>
|
||||
<span class='ocrx_word' id='word_1_252' title='bbox 832 1860 881 1886; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_253' title='bbox 893 1868 1031 1893; x_wconf 96'>presence</span>
|
||||
<span class='ocrx_word' id='word_1_254' title='bbox 1043 1860 1075 1886; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_255' title='bbox 1083 1861 1126 1890; x_wconf 96'>ES,</span>
|
||||
<span class='ocrx_word' id='word_1_256' title='bbox 1138 1860 1284 1886; x_wconf 96'>corrosion</span>
|
||||
<span class='ocrx_word' id='word_1_257' title='bbox 1296 1860 1418 1893; x_wconf 93'>damage</span>
|
||||
<span class='ocrx_word' id='word_1_258' title='bbox 1428 1868 1488 1886; x_wconf 95'>was</span>
|
||||
<span class='ocrx_word' id='word_1_259' title='bbox 1501 1860 1675 1890; x_wconf 96'>minimized,</span>
|
||||
<span class='ocrx_word' id='word_1_260' title='bbox 1687 1860 1758 1886; x_wconf 96'>with</span>
|
||||
</span>
|
||||
<span class='ocr_line' id='line_1_31' title="bbox 167 1907 1225 1940; baseline 0 -7; x_size 33; x_descenders 7; x_ascenders 8">
|
||||
<span class='ocrx_word' id='word_1_261' title='bbox 167 1915 203 1933; x_wconf 96'>an</span>
|
||||
<span class='ocrx_word' id='word_1_262' title='bbox 217 1907 354 1933; x_wconf 96'>evidence</span>
|
||||
<span class='ocrx_word' id='word_1_263' title='bbox 369 1907 401 1933; x_wconf 96'>of</span>
|
||||
<span class='ocrx_word' id='word_1_264' title='bbox 411 1908 446 1933; x_wconf 96'>ES</span>
|
||||
<span class='ocrx_word' id='word_1_265' title='bbox 460 1911 577 1940; x_wconf 96'>present</span>
|
||||
<span class='ocrx_word' id='word_1_266' title='bbox 590 1915 628 1933; x_wconf 96'>on</span>
|
||||
<span class='ocrx_word' id='word_1_267' title='bbox 642 1907 691 1933; x_wconf 96'>the</span>
|
||||
<span class='ocrx_word' id='word_1_268' title='bbox 705 1907 792 1933; x_wconf 96'>metal</span>
|
||||
<span class='ocrx_word' id='word_1_269' title='bbox 807 1907 916 1933; x_wconf 96'>surface</span>
|
||||
<span class='ocrx_word' id='word_1_270' title='bbox 931 1915 960 1933; x_wconf 96'>as</span>
|
||||
<span class='ocrx_word' id='word_1_271' title='bbox 975 1907 1078 1933; x_wconf 95'>shown</span>
|
||||
<span class='ocrx_word' id='word_1_272' title='bbox 1092 1907 1121 1933; x_wconf 95'>in</span>
|
||||
<span class='ocrx_word' id='word_1_273' title='bbox 1135 1907 1186 1940; x_wconf 94'>Fig.</span>
|
||||
<span class='ocrx_word' id='word_1_274' title='bbox 1200 1908 1225 1934; x_wconf 96'>8.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_photo' id='block_1_9' title="bbox 465 2101 1329 2448"></div>
|
||||
<div class='ocr_separator' id='block_1_10' title="bbox 560 2047 562 2481"></div>
|
||||
<div class='ocr_carea' id='block_1_11' title="bbox 523 2461 1337 2513">
|
||||
<p class='ocr_par' id='par_1_7' lang='eng' title="bbox 523 2461 1337 2513">
|
||||
<span class='ocr_caption' id='line_1_32' title="bbox 523 2461 1325 2481; baseline 0.001 -1; x_size 25.6; x_descenders 6.4000001; x_ascenders 6.4000001">
|
||||
<span class='ocrx_word' id='word_1_275' title='bbox 523 2461 536 2480; x_wconf 97'>2</span>
|
||||
<span class='ocrx_word' id='word_1_276' title='bbox 750 2473 752 2481; x_wconf 68'>7</span>
|
||||
<span class='ocrx_word' id='word_1_277' title='bbox 941 2473 943 2481; x_wconf 47'>T</span>
|
||||
<span class='ocrx_word' id='word_1_278' title='bbox 1132 2473 1134 2481; x_wconf 69'>T</span>
|
||||
<span class='ocrx_word' id='word_1_279' title='bbox 1227 2471 1230 2477; x_wconf 27'>7</span>
|
||||
<span class='ocrx_word' id='word_1_280' title='bbox 1323 2473 1325 2481; x_wconf 90'>1</span>
|
||||
</span>
|
||||
<span class='ocr_caption' id='line_1_33' title="bbox 554 2494 1337 2513; baseline 0 0; x_size 25.6; x_descenders 6.4000001; x_ascenders 6.4000001">
|
||||
<span class='ocrx_word' id='word_1_281' title='bbox 554 2494 566 2513; x_wconf 97'>2</span>
|
||||
<span class='ocrx_word' id='word_1_282' title='bbox 745 2495 757 2513; x_wconf 96'>4</span>
|
||||
<span class='ocrx_word' id='word_1_283' title='bbox 936 2494 948 2513; x_wconf 96'>6</span>
|
||||
<span class='ocrx_word' id='word_1_284' title='bbox 1127 2494 1139 2513; x_wconf 93'>8</span>
|
||||
<span class='ocrx_word' id='word_1_285' title='bbox 1312 2494 1337 2513; x_wconf 96'>10</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_carea' id='block_1_12' title="bbox 844 2535 1041 2559">
|
||||
<p class='ocr_par' id='par_1_8' lang='eng' title="bbox 844 2535 1041 2559">
|
||||
<span class='ocr_caption' id='line_1_34' title="bbox 844 2535 1041 2559; baseline 0.005 -5; x_size 25.310345; x_descenders 5.3103447; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_286' title='bbox 844 2535 1001 2554; x_wconf 93'>Concentration</span>
|
||||
<span class='ocrx_word' id='word_1_287' title='bbox 1011 2535 1041 2559; x_wconf 92'>(g)</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class='ocr_separator' id='block_1_13' title="bbox 548 2470 1325 2474"></div>
|
||||
<div class='ocr_carea' id='block_1_14' title="bbox 683 2594 1241 2621">
|
||||
<p class='ocr_par' id='par_1_9' lang='eng' title="bbox 683 2594 1241 2621">
|
||||
<span class='ocr_line' id='line_1_35' title="bbox 683 2594 1241 2621; baseline 0 -6; x_size 27; x_descenders 6; x_ascenders 6">
|
||||
<span class='ocrx_word' id='word_1_288' title='bbox 683 2594 729 2621; x_wconf 96'>Fig.</span>
|
||||
<span class='ocrx_word' id='word_1_289' title='bbox 739 2595 760 2615; x_wconf 96'>5.</span>
|
||||
<span class='ocrx_word' id='word_1_290' title='bbox 776 2594 895 2621; x_wconf 96'>Langmuir</span>
|
||||
<span class='ocrx_word' id='word_1_291' title='bbox 905 2594 1039 2621; x_wconf 96'>adsorption</span>
|
||||
<span class='ocrx_word' id='word_1_292' title='bbox 1051 2594 1162 2615; x_wconf 95'>isotherm</span>
|
||||
<span class='ocrx_word' id='word_1_293' title='bbox 1173 2594 1199 2615; x_wconf 95'>of</span>
|
||||
<span class='ocrx_word' id='word_1_294' title='bbox 1207 2595 1241 2616; x_wconf 96'>ES.</span>
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
21
crates/kreuzberg/test_data/hocr/word_confidence.hocr
Normal file
21
crates/kreuzberg/test_data/hocr/word_confidence.hocr
Normal file
@@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
||||
<meta name='ocr-system' content='tesseract 4.0.0-beta.1' />
|
||||
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word'/>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<p id="no_confidence">Foo</p>
|
||||
<p id="x_wconf_given" title="x_wconf 80">Foo</p>
|
||||
<p id="malformed_x_wconf" title="x_wconf eighty">Foo</p>
|
||||
<p id="x_confs_given" title="x_confs 20 7 90">Foo</p>
|
||||
<p id="malformed_x_confs" title="x_confs a b c">Foo</p>
|
||||
<p id="x_wconf_and_x_confs" title="x_wconf 80; x_confs 20 5 90">Foo</p>
|
||||
|
||||
</body>
|
||||
<html>
|
||||
Reference in New Issue
Block a user