20 lines
456 B
Markdown
20 lines
456 B
Markdown
|
|
```elixir title="Elixir"
|
||
|
|
config_json = Jason.encode!(%{
|
||
|
|
"ocr" => %{
|
||
|
|
"backend" => "tesseract",
|
||
|
|
"tesseract_config" => %{
|
||
|
|
"preprocessing" => %{
|
||
|
|
"target_dpi" => 300,
|
||
|
|
"denoise" => true,
|
||
|
|
"deskew" => true,
|
||
|
|
"contrast_enhance" => true,
|
||
|
|
"binarization_method" => "otsu"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
})
|
||
|
|
|
||
|
|
{:ok, result} = Kreuzberg.extract_file_sync("scanned.pdf", "application/pdf", config_json)
|
||
|
|
IO.puts(result.content)
|
||
|
|
```
|