Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
{
"extends": ["@commitlint/config-conventional"],
"rules": {
"body-max-line-length": [2, "always", 100],
"header-max-length": [2, "always", 100],
"subject-case": [2, "never", ["sentence-case", "start-case", "pascal-case", "upper-case"]],
"type-enum": [
2,
"always",
["feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore", "revert"]
]
}
}

View File

@@ -0,0 +1,2 @@
/third_party/
/tessdata/

2933
crates/kreuzberg-tesseract/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,64 @@
[package]
name = "kreuzberg-tesseract"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
authors.workspace = true
description = "Rust bindings for Tesseract OCR with cross-compilation, C++17, and caching improvements"
license = "MIT"
repository.workspace = true
homepage = "https://kreuzberg.dev"
documentation = "https://docs.kreuzberg.dev"
readme = "README.md"
keywords = ["tesseract", "ocr", "bindings", "vision", "recognition"]
categories = ["external-ffi-bindings", "computer-vision", "text-processing"]
build = "build.rs"
links = "kreuzberg_tesseract"
exclude = ["tessdata/*", "third_party/*"]
[package.metadata.docs.rs]
features = ["docs-only"]
rustdoc-args = ["--cfg", "docsrs"]
[package.metadata.cargo-machete]
ignored = ["cc", "cmake", "reqwest", "zip"]
[lib]
name = "kreuzberg_tesseract"
crate-type = ["lib"]
[features]
default = ["static-linking"]
build-tesseract = ["cc", "cmake", "reqwest", "zip"]
build-tesseract-wasm = ["cmake", "reqwest", "zip"]
# Bundle eng.traineddata into the compiled crate so WASM builds can run OCR
# without runtime tessdata loading. Uses ~4 MB of binary size (tessdata_fast).
bundle-tessdata-eng = []
static-linking = ["build-tesseract"]
dynamic-linking = []
[dependencies]
thiserror = { workspace = true }
[build-dependencies]
cc = { version = "^1.2.63", optional = true }
cmake = { version = "0.1.58", optional = true }
zip = { version = ">=7.0.0", optional = true, default-features = false, features = [
"deflate-flate2-zlib-rs",
] }
[target.'cfg(not(target_os = "windows"))'.build-dependencies]
reqwest = { workspace = true, default-features = false, features = [
"blocking",
"rustls",
], optional = true }
# Use native-tls on Windows to avoid aws-lc-sys CMake build issues with MinGW
[target.'cfg(target_os = "windows")'.build-dependencies]
reqwest = { workspace = true, default-features = false, features = [
"blocking",
"native-tls",
], optional = true }
[dev-dependencies]
image = { workspace = true, features = ["png"] }

View File

@@ -0,0 +1,22 @@
MIT License
Copyright (c) 2024 Cafer Can Gündoğdu
Copyright (c) 2025 Na'aman Hirschfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,405 @@
# kreuzberg-tesseract
[![Bindings](https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6)](https://github.com/kreuzberg-dev/alef)
Rust bindings for Tesseract OCR with built-in compilation of Tesseract and Leptonica libraries. Provides a safe and idiomatic Rust interface to Tesseract's functionality while handling the complexity of compiling the underlying C++ libraries.
Based on the original [tesseract-rs](https://github.com/cafercangundogdu/tesseract-rs) by Cafer Can Gündoğdu, this maintained version adds critical improvements for production use:
- **C++17 Support**: Upgraded for Tesseract 5.5.1 which requires C++17 filesystem
- **Cross-Compilation**: Fixed CXX compiler detection for cross-platform builds
- **Architecture Validation**: Validates target architecture before using cached libraries
- **Windows Static Linking**: Fixed MSVC static linking issues
- **Build Caching**: Improved caching with OUT_DIR-based cache directory
- **MinGW Support**: Added support for MinGW toolchains
## Features
- Safe Rust bindings for Tesseract OCR
- **Multiple linking options:**
- **Static linking** (default): Built-in compilation with no runtime dependencies
- **Dynamic linking**: Link to system-installed libraries for faster builds
- Uses existing Tesseract training data (expects English data for tests)
- High-level Rust API for common OCR tasks
- Caching of compiled libraries for faster subsequent builds
- Support for multiple operating systems (Linux, macOS, Windows)
## Installation
### Static Linking (Default)
Static linking builds Tesseract and Leptonica from source and embeds them in your binary. No runtime dependencies required:
```toml
[dependencies]
kreuzberg-tesseract = "1.0.0-rc.1"
# or explicitly:
kreuzberg-tesseract = { version = "1.0.0-rc.1", features = ["static-linking"] }
```
### Dynamic Linking
Dynamic linking uses system-installed Tesseract and Leptonica libraries. Faster builds, but requires libraries installed on the system:
```toml
[dependencies]
kreuzberg-tesseract = { version = "1.0.0-rc.1", features = ["dynamic-linking"], default-features = false }
```
**System requirements for dynamic linking:**
- Tesseract 5.x libraries installed (`libtesseract`, `libleptonica`)
- macOS: `brew install tesseract leptonica`
- Ubuntu/Debian: `sudo apt-get install libtesseract-dev libleptonica-dev`
- RHEL/CentOS/Fedora: `sudo dnf install tesseract-devel leptonica-devel`
- Windows: Install from [Tesseract releases](https://github.com/tesseract-ocr/tesseract/releases) or vcpkg
### Development Dependencies
For development and testing, you'll also need these dependencies:
```toml
[dev-dependencies]
image = "0.25.5"
```
## System Requirements
### For Static Linking (Default)
When building with static linking, the crate will compile Tesseract and Leptonica from source. You need:
- Rust 1.85.0 or later
- A C++ compiler (e.g., gcc, clang, MSVC on Windows)
- CMake 3.x or later
- Internet connection (for downloading Tesseract source code)
### For Dynamic Linking
When using dynamic linking with system-installed libraries, you need:
- Rust 1.85.0 or later
- Tesseract 5.x and Leptonica libraries installed on your system (see Installation section)
- Internet connection (for downloading Tesseract source code)
No C++ compiler or CMake required for dynamic linking builds.
For a full development environment checklist (including optional tooling suggestions), see [CONTRIBUTING.md](../../CONTRIBUTING.md).
## Environment Variables
The following environment variables affect the build and test process:
### Build Variables
- `CARGO_CLEAN`: If set, cleans the cache directory before building
- `RUSTC_WRAPPER`: If set to "sccache", enables compiler caching with sccache
- `CC`: Compiler selection for C code (affects Linux builds)
- `HOME` (Unix) or `APPDATA` (Windows): Used to determine cache directory location
- `TESSERACT_RS_CACHE_DIR`: Optional override for the cache root. When unset or not writable, the build falls back to the default OS-specific directory, and if that still fails, a temporary directory under the system temp folder is used automatically.
### Test Variables
- `TESSDATA_PREFIX` (Optional): Path to override the default tessdata directory. If not set, the crate will use its default cache directory.
## Cache and Data Directories
The crate uses the following directory structure based on your operating system:
- macOS: `~/Library/Application Support/tesseract-rs`
- Linux: `~/.tesseract-rs`
- Windows: `%APPDATA%/tesseract-rs`
The cache includes:
- Compiled Tesseract and Leptonica libraries
- Third-party source code
Training data is not downloaded during the build. Provide `eng.traineddata` (and any other languages you need) via `TESSDATA_PREFIX` or your system Tesseract installation.
## Testing
The project includes several integration tests that verify OCR functionality. To run the tests:
1. Ensure you have the required test dependencies:
```toml
[dev-dependencies]
image = "0.25.9"
```
2. Run the tests:
```bash
cargo test
```
Note: Make sure `eng.traineddata` is available in your tessdata directory before running tests. If `TESSDATA_PREFIX` is not set, the tests look in the default cache location. You can point the tests at a custom tessdata directory by setting:
```bash
# Linux/macOS
export TESSDATA_PREFIX=/path/to/custom/tessdata
# Windows (PowerShell)
$env:TESSDATA_PREFIX="C:\path\to\custom\tessdata"
```
Available test cases:
- OCR on English sample images
- Error handling and invalid input coverage
Test images are sourced from the shared `test_documents/` directory in the repository:
- `images/test_hello_world.png`: Simple English text
- `tables/simple_table.png`: Basic table with English headers
## Usage
Here's a basic example of how to use `tesseract-rs`:
```rust
use std::path::PathBuf;
use std::error::Error;
use kreuzberg_tesseract::TesseractAPI;
fn get_default_tessdata_dir() -> PathBuf {
if cfg!(target_os = "macos") {
let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
PathBuf::from(home_dir)
.join("Library")
.join("Application Support")
.join("tesseract-rs")
.join("tessdata")
} else if cfg!(target_os = "linux") {
let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
PathBuf::from(home_dir)
.join(".tesseract-rs")
.join("tessdata")
} else if cfg!(target_os = "windows") {
PathBuf::from(std::env::var("APPDATA").expect("APPDATA environment variable not set"))
.join("tesseract-rs")
.join("tessdata")
} else {
panic!("Unsupported operating system");
}
}
fn get_tessdata_dir() -> PathBuf {
match std::env::var("TESSDATA_PREFIX") {
Ok(dir) => {
let path = PathBuf::from(dir);
println!("Using TESSDATA_PREFIX directory: {:?}", path);
path
}
Err(_) => {
let default_dir = get_default_tessdata_dir();
println!(
"TESSDATA_PREFIX not set, using default directory: {:?}",
default_dir
);
default_dir
}
}
}
fn main() -> Result<(), Box<dyn Error>> {
let api = TesseractAPI::new()?;
// Get tessdata directory (uses default location or TESSDATA_PREFIX if set)
let tessdata_dir = get_tessdata_dir();
api.init(tessdata_dir.to_str().unwrap(), "eng")?;
let width = 24;
let height = 24;
let bytes_per_pixel = 1;
let bytes_per_line = width * bytes_per_pixel;
// Initialize image data with all white pixels
let mut image_data = vec![255u8; width * height];
// Draw number 9 with clearer distinction
for y in 4..19 {
for x in 7..17 {
// Top bar
if y == 4 && x >= 8 && x <= 15 {
image_data[y * width + x] = 0;
}
// Top curve left side
if y >= 4 && y <= 10 && x == 7 {
image_data[y * width + x] = 0;
}
// Top curve right side
if y >= 4 && y <= 11 && x == 16 {
image_data[y * width + x] = 0;
}
// Middle bar
if y == 11 && x >= 8 && x <= 15 {
image_data[y * width + x] = 0;
}
// Bottom right vertical line
if y >= 11 && y <= 18 && x == 16 {
image_data[y * width + x] = 0;
}
// Bottom bar
if y == 18 && x >= 8 && x <= 15 {
image_data[y * width + x] = 0;
}
}
}
// Set the image data
api.set_image(
&image_data,
width.try_into().unwrap(),
height.try_into().unwrap(),
bytes_per_pixel.try_into().unwrap(),
bytes_per_line.try_into().unwrap(),
)?;
// Set whitelist for digits only
api.set_variable("tessedit_char_whitelist", "0123456789")?;
// Set PSM mode to single character
api.set_variable("tessedit_pageseg_mode", "10")?;
// Get the recognized text
let text = api.get_utf8_text()?;
println!("Recognized text: {}", text.trim());
Ok(())
}
```
## Advanced Usage
The API provides additional functionality for more complex OCR tasks, including thread-safe operations:
```rust
use kreuzberg_tesseract::TesseractAPI;
use std::sync::Arc;
use std::thread;
use std::error::Error;
fn main() -> Result<(), Box<dyn Error>> {
let tessdata_dir = get_tessdata_dir();
let api = TesseractAPI::new()?;
// Initialize the main API
api.init(tessdata_dir.to_str().unwrap(), "eng")?;
api.set_variable("tessedit_pageseg_mode", "1")?;
// Load and prepare image data
let (image_data, width, height) = load_test_image("sample_text.png")?;
// Share image data across threads
let image_data = Arc::new(image_data);
let mut handles = vec![];
// Spawn multiple threads for parallel OCR processing
for _ in 0..3 {
let api_clone = api.clone(); // Clones the API with all configurations
let image_data = Arc::clone(&image_data);
let handle = thread::spawn(move || {
// Set image in each thread
let res = api_clone.set_image(
&image_data,
width as i32,
height as i32,
3,
3 * width as i32,
);
assert!(res.is_ok());
// Perform OCR in parallel
let text = api_clone.get_utf8_text()
.expect("Failed to get text");
println!("Thread result: {}", text);
});
handles.push(handle);
}
// Wait for all threads to complete
for handle in handles {
handle.join().unwrap();
}
Ok(())
}
// Helper function to get tessdata directory
fn get_tessdata_dir() -> PathBuf {
// ... (implementation as shown in basic example)
}
// Helper function to load test image
fn load_test_image(filename: &str) -> Result<(Vec<u8>, u32, u32), Box<dyn Error>> {
let img = image::open(filename)?
.to_rgb8();
let (width, height) = img.dimensions();
Ok((img.into_raw(), width, height))
}
```
## Building
### Static Linking (Default)
With static linking, the crate will automatically download and compile Tesseract and Leptonica during the build process. This may take some time on the first build (5-10 minutes), but subsequent builds will use the cached libraries.
To clean the cache and force a rebuild:
```bash
CARGO_CLEAN=1 cargo build
```
### Dynamic Linking
With dynamic linking, the build is much faster (seconds instead of minutes) since it only links against system-installed libraries:
```bash
cargo build --no-default-features --features dynamic-linking
```
**Note**: Dynamic linking requires Tesseract and Leptonica to be installed on your system (see Installation section).
## Documentation
For more detailed information, please check the [API documentation](https://docs.rs/kreuzberg-tesseract).
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
## Acknowledgements
This project is based on the original [tesseract-rs](https://github.com/cafercangundogdu/tesseract-rs) by [Cafer Can Gündoğdu](https://github.com/cafercangundogdu). We are grateful for the foundational work that made this project possible.
## Contributing
We welcome contributions! Please see our [Contributing Guide](../../CONTRIBUTING.md) for details.
### Quick Start for Contributors
1. Fork and clone the repository
2. Install uv and set up git hooks:
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uvx prek install
```
3. Make your changes following our commit message format
4. Run tests: `cargo test`
5. Submit a Pull Request
Our commit messages follow the [Conventional Commits](https://www.conventionalcommits.org/) specification.
## Acknowledgements
This project uses [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) and [Leptonica](http://leptonica.org/). We are grateful to the maintainers and contributors of these projects.
```text
```

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,74 @@
# Tesseract WASM Patches
This directory contains patches needed to compile Tesseract for WebAssembly (WASM) targets using WASI SDK.
These patches are vendored from the [tesseract-wasm](https://github.com/naptha/tesseract.js) project and have been proven to work with WASM compilation.
## Patches
### tesseract.diff
A comprehensive patch that makes Tesseract compatible with WASM compilation. The patch includes the following changes:
#### 1. CMakeLists.txt Modifications
- **New CMake option**: `BUILD_TESSERACT_BINARY` (default: ON)
- Allows disabling the Tesseract CLI binary build, which is not needed for WASM
- Wraps all executable and installation targets for the tesseract binary
- **Disabled components for WASM**:
- Removes OpenCL support (`src/opencl/*.cpp`) - not applicable to WASM
- Removes viewer support (`src/viewer/*.cpp`) - UI components not needed for WASM
- Removes C API bindings (`src/api/capi.cpp`) - only hocrrenderer is kept
- Removes PDF and rendering support files:
- `src/api/renderer.cpp`
- `src/api/altorenderer.cpp`
- `src/api/lstmboxrenderer.cpp`
- `src/api/pdfrenderer.cpp`
- `src/api/wordstrboxrenderer.cpp`
#### 2. SIMD Detection Fixes (src/arch/simddetect.cpp)
- Guards CPUID detection with `#if !defined(__wasm__)`
- Prevents attempts to use CPU feature detection that don't exist in WASM
- The HAS_CPUID macro is only defined for non-WASM builds
- This allows the code to gracefully handle WASM's SIMD limitations
#### 3. Pointer Type Fixes (src/ccmain/pageiterator.cpp, src/ccmain/pagesegmain.cpp, src/ccmain/tesseractclass.cpp)
**Changed from stack allocation to heap allocation** in `tesseractclass.h`:
- `pixa_debug_` changed from `DebugPixa` to `std::unique_ptr<DebugPixa>`
- This prevents large allocations on the stack, which is limited in WASM
**Updated all references** throughout the codebase:
- `.get()` calls added where raw pointers are needed
- Arrow operator `->` replaces dot operator `.` for member access
- Null checks added before dereferencing to prevent crashes
**Affected functions**:
- `PageIterator::Orientation()` - added null vector check
- `Tesseract::AutoPageSeg()` - updated pointer passing
- `Tesseract::SetupPageSegAndDetectOrientation()` - multiple pointer updates
- `Tesseract::Clear()` - added null check before WritePDF
- `Tesseract::PrepareForPageseg()` - updated Split() calls
- `Tesseract::PrepareForTessOCR()` - updated Split() calls
#### 4. Additional Fixes
- **Orientation detection**: Changed comparison from `> 0.0F` to `>= 0.0F` in `pageiterator.cpp` to handle null vectors gracefully when orientation info is not available
## How to Apply
These patches are applied during the WASM build process. They modify the Tesseract source code to:
1. Disable WASM-incompatible features (OpenCL, viewers, renderers)
2. Prevent CPUID detection in WASM environment
3. Use heap allocation instead of stack allocation for large objects
4. Handle missing pointer initialization gracefully
## Source
These patches are based on the proven WASM compilation approach used by the tesseract.js project, which successfully compiles Tesseract to WebAssembly and deploys it in production environments.

View File

@@ -0,0 +1,199 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8c6845cb..fdcfc4a8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -90,6 +90,7 @@ option(ENABLE_LTO "Enable link-time optimization" OFF)
option(FAST_FLOAT "Enable float for LSTM" ON)
option(ENABLE_OPENCL "Enable unsupported experimental OpenCL support" OFF)
option(BUILD_TRAINING_TOOLS "Build training tools" ON)
+option(BUILD_TESSERACT_BINARY "Build Tesseract binary" ON)
option(BUILD_TESTS "Build tests" OFF)
option(USE_SYSTEM_ICU "Use system ICU" OFF)
option(DISABLE_ARCHIVE "Disable build with libarchive (if available)" OFF)
@@ -565,9 +566,7 @@ file(
src/cutil/*.cpp
src/dict/*.cpp
src/lstm/*.cpp
- src/opencl/*.cpp
src/textord/*.cpp
- src/viewer/*.cpp
src/wordrec/*.cpp)
if(DISABLED_LEGACY_ENGINE)
@@ -714,13 +713,7 @@ file(
set(TESSERACT_SRC
${TESSERACT_SRC}
src/api/baseapi.cpp
- src/api/capi.cpp
- src/api/renderer.cpp
- src/api/altorenderer.cpp
- src/api/hocrrenderer.cpp
- src/api/lstmboxrenderer.cpp
- src/api/pdfrenderer.cpp
- src/api/wordstrboxrenderer.cpp)
+ src/api/hocrrenderer.cpp)
set(TESSERACT_CONFIGS
tessdata/configs/alto
@@ -858,14 +851,16 @@ endif()
# EXECUTABLE tesseract
# ##############################################################################
-add_executable(tesseract src/tesseract.cpp)
-target_link_libraries(tesseract libtesseract)
-if(HAVE_TIFFIO_H AND WIN32)
- target_link_libraries(tesseract ${TIFF_LIBRARIES})
-endif()
+if(BUILD_TESSERACT_BINARY)
+ add_executable(tesseract src/tesseract.cpp)
+ target_link_libraries(tesseract libtesseract)
+ if(HAVE_TIFFIO_H AND WIN32)
+ target_link_libraries(tesseract ${TIFF_LIBRARIES})
+ endif()
-if(OPENMP_BUILD AND UNIX)
- target_link_libraries(tesseract pthread)
+ if(OPENMP_BUILD AND UNIX)
+ target_link_libraries(tesseract pthread)
+ endif()
endif()
# ##############################################################################
@@ -899,7 +894,11 @@ write_basic_package_version_file(
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tesseract.pc
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
-install(TARGETS tesseract DESTINATION bin)
+
+if(BUILD_TESSERACT_BINARY)
+ install(TARGETS tesseract DESTINATION bin)
+endif()
+
install(
TARGETS libtesseract
EXPORT TesseractTargets
diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp
index 1afe5a5d..cb8c6d4c 100644
--- a/src/arch/simddetect.cpp
+++ b/src/arch/simddetect.cpp
@@ -40,10 +40,12 @@
#endif
+#if !defined(__wasm__)
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA) || defined(HAVE_SSE4_1)
// See https://en.wikipedia.org/wiki/CPUID.
# define HAS_CPUID
#endif
+#endif
#if defined(HAS_CPUID)
# if defined(__GNUC__)
diff --git a/src/ccmain/pageiterator.cpp b/src/ccmain/pageiterator.cpp
index 64ff7f66..c0f80e5f 100644
--- a/src/ccmain/pageiterator.cpp
+++ b/src/ccmain/pageiterator.cpp
@@ -582,7 +582,9 @@ void PageIterator::Orientation(tesseract::Orientation *orientation,
up_in_image.rotate(block->re_rotation());
if (up_in_image.x() == 0.0F) {
- if (up_in_image.y() > 0.0F) {
+ // tesseract-wasm note: `up_in_image` will be a null vector if orientation
+ // info is not available. In that case, assume page up.
+ if (up_in_image.y() >= 0.0F) {
*orientation = ORIENTATION_PAGE_UP;
} else {
*orientation = ORIENTATION_PAGE_DOWN;
diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp
index 0af44607..718e73ef 100644
--- a/src/ccmain/pagesegmain.cpp
+++ b/src/ccmain/pagesegmain.cpp
@@ -222,7 +222,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST *blocks, TO_BLOC
}
#endif // ndef DISABLED_LEGACY_ENGINE
result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, to_block,
- photomask_pix, pix_thresholds_, pix_grey_, &pixa_debug_,
+ photomask_pix, pix_thresholds_, pix_grey_, pixa_debug_.get(),
&found_blocks, diacritic_blobs, to_blocks);
if (result >= 0) {
finder->GetDeskewVectors(&deskew_, &reskew_);
@@ -279,17 +279,17 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
ICOORD bleft(0, 0);
ASSERT_HOST(pix_binary_ != nullptr);
- if (tessedit_dump_pageseg_images) {
- pixa_debug_.AddPix(pix_binary_, "PageSegInput");
+ if (tessedit_dump_pageseg_images && pixa_debug_) {
+ pixa_debug_->AddPix(pix_binary_, "PageSegInput");
}
// Leptonica is used to find the rule/separator lines in the input.
LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_,
&vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines);
- if (tessedit_dump_pageseg_images) {
- pixa_debug_.AddPix(pix_binary_, "NoLines");
+ if (tessedit_dump_pageseg_images && pixa_debug_) {
+ pixa_debug_->AddPix(pix_binary_, "NoLines");
}
// Leptonica is used to find a mask of the photo regions in the input.
- *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
+ *photo_mask_pix = ImageFind::FindImages(pix_binary_, pixa_debug_.get());
if (tessedit_dump_pageseg_images) {
Image pix_no_image_ = nullptr;
if (*photo_mask_pix != nullptr) {
@@ -297,7 +297,7 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
} else {
pix_no_image_ = pix_binary_.clone();
}
- pixa_debug_.AddPix(pix_no_image_, "NoImages");
+ pixa_debug_->AddPix(pix_no_image_, "NoImages");
pix_no_image_.destroy();
}
if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp
index fd58ac87..517f925e 100644
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@@ -487,8 +487,10 @@ Dict &Tesseract::getDict() {
}
void Tesseract::Clear() {
- std::string debug_name = imagebasename + "_debug.pdf";
- pixa_debug_.WritePDF(debug_name.c_str());
+ if (pixa_debug_) {
+ std::string debug_name = imagebasename + "_debug.pdf";
+ pixa_debug_->WritePDF(debug_name.c_str());
+ }
pix_binary_.destroy();
pix_grey_.destroy();
pix_thresholds_.destroy();
@@ -572,7 +574,7 @@ void Tesseract::PrepareForPageseg() {
// the newly split image.
splitter_.set_orig_pix(pix_binary());
splitter_.set_pageseg_split_strategy(max_pageseg_strategy);
- if (splitter_.Split(true, &pixa_debug_)) {
+ if (splitter_.Split(true, pixa_debug_.get())) {
ASSERT_HOST(splitter_.splitted_image());
pix_binary_.destroy();
pix_binary_ = splitter_.splitted_image().clone();
@@ -599,7 +601,7 @@ void Tesseract::PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, O
splitter_.set_segmentation_block_list(block_list);
splitter_.set_ocr_split_strategy(max_ocr_strategy);
// Run the splitter for OCR
- bool split_for_ocr = splitter_.Split(false, &pixa_debug_);
+ bool split_for_ocr = splitter_.Split(false, pixa_debug_.get());
// Restore pix_binary to the binarized original pix for future reference.
ASSERT_HOST(splitter_.orig_pix());
pix_binary_.destroy();
diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h
index 732bb9e6..030aa5bc 100644
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@@ -986,7 +986,7 @@ private:
// Thresholds that were used to generate the thresholded image from grey.
Image pix_thresholds_;
// Debug images. If non-empty, will be written on destruction.
- DebugPixa pixa_debug_;
+ std::unique_ptr<DebugPixa> pixa_debug_;
// Input image resolution after any scaling. The resolution is not well
// transmitted by operations on Pix, so we keep an independent record here.
int source_resolution_;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,77 @@
use crate::api::TessDeleteText;
use crate::error::{Result, TesseractError};
use std::ffi::CStr;
use std::os::raw::{c_char, c_float, c_int, c_void};
use std::sync::{Arc, Mutex};
pub struct ChoiceIterator {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for ChoiceIterator {}
unsafe impl Sync for ChoiceIterator {}
impl ChoiceIterator {
/// Creates a new instance of the ChoiceIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the ChoiceIterator.
pub fn new(handle: *mut c_void) -> Self {
ChoiceIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Gets the next choice.
///
/// # Returns
///
/// Returns `true` if the next choice is successful, otherwise returns `false`.
pub fn next(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessChoiceIteratorNext(*handle) != 0 })
}
/// Gets the UTF-8 text for the current choice.
///
/// # Returns
///
/// Returns the UTF-8 text as a `String` if successful, otherwise returns an error.
pub fn get_utf8_text(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let text_ptr = unsafe { TessChoiceIteratorGetUTF8Text(*handle) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let result = c_str.to_str()?.to_owned();
unsafe { TessDeleteText(text_ptr) };
Ok(result)
}
/// Gets the confidence of the current choice.
///
/// # Returns
///
/// Returns the confidence as a `f32`.
pub fn confidence(&self) -> Result<f32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessChoiceIteratorConfidence(*handle) })
}
}
impl Drop for ChoiceIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessChoiceIteratorDelete(*handle) };
}
}
}
ffi_extern! {
fn TessChoiceIteratorDelete(handle: *mut c_void);
fn TessChoiceIteratorNext(handle: *mut c_void) -> c_int;
fn TessChoiceIteratorGetUTF8Text(handle: *mut c_void) -> *mut c_char;
fn TessChoiceIteratorConfidence(handle: *mut c_void) -> c_float;
}

View File

@@ -0,0 +1,373 @@
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessPageSegMode {
PSM_OSD_ONLY = 0,
PSM_AUTO_OSD = 1,
PSM_AUTO_ONLY = 2,
PSM_AUTO = 3,
PSM_SINGLE_COLUMN = 4,
PSM_SINGLE_BLOCK_VERT_TEXT = 5,
PSM_SINGLE_BLOCK = 6,
PSM_SINGLE_LINE = 7,
PSM_SINGLE_WORD = 8,
PSM_CIRCLE_WORD = 9,
PSM_SINGLE_CHAR = 10,
PSM_SPARSE_TEXT = 11,
PSM_SPARSE_TEXT_OSD = 12,
PSM_RAW_LINE = 13,
PSM_COUNT = 14,
}
impl TessPageSegMode {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessPageSegMode::PSM_OSD_ONLY,
1 => TessPageSegMode::PSM_AUTO_OSD,
2 => TessPageSegMode::PSM_AUTO_ONLY,
3 => TessPageSegMode::PSM_AUTO,
4 => TessPageSegMode::PSM_SINGLE_COLUMN,
5 => TessPageSegMode::PSM_SINGLE_BLOCK_VERT_TEXT,
6 => TessPageSegMode::PSM_SINGLE_BLOCK,
7 => TessPageSegMode::PSM_SINGLE_LINE,
8 => TessPageSegMode::PSM_SINGLE_WORD,
9 => TessPageSegMode::PSM_CIRCLE_WORD,
10 => TessPageSegMode::PSM_SINGLE_CHAR,
11 => TessPageSegMode::PSM_SPARSE_TEXT,
12 => TessPageSegMode::PSM_SPARSE_TEXT_OSD,
13 => TessPageSegMode::PSM_RAW_LINE,
14 => TessPageSegMode::PSM_COUNT,
_ => TessPageSegMode::PSM_AUTO,
}
}
/// Safely convert an integer to a TessPageSegMode, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessPageSegMode::PSM_OSD_ONLY),
1 => Some(TessPageSegMode::PSM_AUTO_OSD),
2 => Some(TessPageSegMode::PSM_AUTO_ONLY),
3 => Some(TessPageSegMode::PSM_AUTO),
4 => Some(TessPageSegMode::PSM_SINGLE_COLUMN),
5 => Some(TessPageSegMode::PSM_SINGLE_BLOCK_VERT_TEXT),
6 => Some(TessPageSegMode::PSM_SINGLE_BLOCK),
7 => Some(TessPageSegMode::PSM_SINGLE_LINE),
8 => Some(TessPageSegMode::PSM_SINGLE_WORD),
9 => Some(TessPageSegMode::PSM_CIRCLE_WORD),
10 => Some(TessPageSegMode::PSM_SINGLE_CHAR),
11 => Some(TessPageSegMode::PSM_SPARSE_TEXT),
12 => Some(TessPageSegMode::PSM_SPARSE_TEXT_OSD),
13 => Some(TessPageSegMode::PSM_RAW_LINE),
14 => Some(TessPageSegMode::PSM_COUNT),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessPageIteratorLevel {
RIL_BLOCK = 0,
RIL_PARA = 1,
RIL_TEXTLINE = 2,
RIL_WORD = 3,
RIL_SYMBOL = 4,
}
impl TessPageIteratorLevel {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessPageIteratorLevel::RIL_BLOCK,
1 => TessPageIteratorLevel::RIL_PARA,
2 => TessPageIteratorLevel::RIL_TEXTLINE,
3 => TessPageIteratorLevel::RIL_WORD,
4 => TessPageIteratorLevel::RIL_SYMBOL,
_ => TessPageIteratorLevel::RIL_BLOCK,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessPolyBlockType {
PT_UNKNOWN = 0,
PT_FLOWING_TEXT = 1,
PT_HEADING_TEXT = 2,
PT_PULLOUT_TEXT = 3,
PT_EQUATION = 4,
PT_INLINE_EQUATION = 5,
PT_TABLE = 6,
PT_VERTICAL_TEXT = 7,
PT_CAPTION_TEXT = 8,
PT_FLOWING_IMAGE = 9,
PT_HEADING_IMAGE = 10,
PT_PULLOUT_IMAGE = 11,
PT_HORZ_LINE = 12,
PT_VERT_LINE = 13,
PT_NOISE = 14,
PT_COUNT = 15,
}
impl TessPolyBlockType {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessPolyBlockType::PT_UNKNOWN,
1 => TessPolyBlockType::PT_FLOWING_TEXT,
2 => TessPolyBlockType::PT_HEADING_TEXT,
3 => TessPolyBlockType::PT_PULLOUT_TEXT,
4 => TessPolyBlockType::PT_EQUATION,
5 => TessPolyBlockType::PT_INLINE_EQUATION,
6 => TessPolyBlockType::PT_TABLE,
7 => TessPolyBlockType::PT_VERTICAL_TEXT,
8 => TessPolyBlockType::PT_CAPTION_TEXT,
9 => TessPolyBlockType::PT_FLOWING_IMAGE,
10 => TessPolyBlockType::PT_HEADING_IMAGE,
11 => TessPolyBlockType::PT_PULLOUT_IMAGE,
12 => TessPolyBlockType::PT_HORZ_LINE,
13 => TessPolyBlockType::PT_VERT_LINE,
14 => TessPolyBlockType::PT_NOISE,
15 => TessPolyBlockType::PT_COUNT,
_ => TessPolyBlockType::PT_UNKNOWN,
}
}
/// Safely convert an integer to a TessPolyBlockType, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessPolyBlockType::PT_UNKNOWN),
1 => Some(TessPolyBlockType::PT_FLOWING_TEXT),
2 => Some(TessPolyBlockType::PT_HEADING_TEXT),
3 => Some(TessPolyBlockType::PT_PULLOUT_TEXT),
4 => Some(TessPolyBlockType::PT_EQUATION),
5 => Some(TessPolyBlockType::PT_INLINE_EQUATION),
6 => Some(TessPolyBlockType::PT_TABLE),
7 => Some(TessPolyBlockType::PT_VERTICAL_TEXT),
8 => Some(TessPolyBlockType::PT_CAPTION_TEXT),
9 => Some(TessPolyBlockType::PT_FLOWING_IMAGE),
10 => Some(TessPolyBlockType::PT_HEADING_IMAGE),
11 => Some(TessPolyBlockType::PT_PULLOUT_IMAGE),
12 => Some(TessPolyBlockType::PT_HORZ_LINE),
13 => Some(TessPolyBlockType::PT_VERT_LINE),
14 => Some(TessPolyBlockType::PT_NOISE),
15 => Some(TessPolyBlockType::PT_COUNT),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessOrientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
}
impl TessOrientation {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessOrientation::ORIENTATION_PAGE_UP,
1 => TessOrientation::ORIENTATION_PAGE_RIGHT,
2 => TessOrientation::ORIENTATION_PAGE_DOWN,
3 => TessOrientation::ORIENTATION_PAGE_LEFT,
_ => TessOrientation::ORIENTATION_PAGE_UP,
}
}
/// Safely convert an integer to a TessOrientation, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessOrientation::ORIENTATION_PAGE_UP),
1 => Some(TessOrientation::ORIENTATION_PAGE_RIGHT),
2 => Some(TessOrientation::ORIENTATION_PAGE_DOWN),
3 => Some(TessOrientation::ORIENTATION_PAGE_LEFT),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN = 0,
JUSTIFICATION_LEFT = 1,
JUSTIFICATION_CENTER = 2,
JUSTIFICATION_RIGHT = 3,
}
impl TessParagraphJustification {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessParagraphJustification::JUSTIFICATION_UNKNOWN,
1 => TessParagraphJustification::JUSTIFICATION_LEFT,
2 => TessParagraphJustification::JUSTIFICATION_CENTER,
3 => TessParagraphJustification::JUSTIFICATION_RIGHT,
_ => TessParagraphJustification::JUSTIFICATION_UNKNOWN,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
}
impl TessWritingDirection {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT,
1 => TessWritingDirection::WRITING_DIRECTION_RIGHT_TO_LEFT,
2 => TessWritingDirection::WRITING_DIRECTION_TOP_TO_BOTTOM,
_ => TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT,
}
}
/// Safely convert an integer to a TessWritingDirection, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT),
1 => Some(TessWritingDirection::WRITING_DIRECTION_RIGHT_TO_LEFT),
2 => Some(TessWritingDirection::WRITING_DIRECTION_TOP_TO_BOTTOM),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
}
impl TessTextlineOrder {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT,
1 => TessTextlineOrder::TEXTLINE_ORDER_RIGHT_TO_LEFT,
2 => TessTextlineOrder::TEXTLINE_ORDER_TOP_TO_BOTTOM,
_ => TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT,
}
}
/// Safely convert an integer to a TessTextlineOrder, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT),
1 => Some(TessTextlineOrder::TEXTLINE_ORDER_RIGHT_TO_LEFT),
2 => Some(TessTextlineOrder::TEXTLINE_ORDER_TOP_TO_BOTTOM),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_page_seg_mode_from_int() {
assert_eq!(TessPageSegMode::from_int(0), TessPageSegMode::PSM_OSD_ONLY);
assert_eq!(TessPageSegMode::from_int(3), TessPageSegMode::PSM_AUTO);
assert_eq!(TessPageSegMode::from_int(10), TessPageSegMode::PSM_SINGLE_CHAR);
assert_eq!(TessPageSegMode::from_int(999), TessPageSegMode::PSM_AUTO);
}
#[test]
fn test_page_seg_mode_conversion() {
let mode = TessPageSegMode::PSM_SINGLE_LINE;
assert_eq!(mode as i32, 7);
}
#[test]
fn test_page_iterator_level_from_int() {
assert_eq!(TessPageIteratorLevel::from_int(0), TessPageIteratorLevel::RIL_BLOCK);
assert_eq!(TessPageIteratorLevel::from_int(3), TessPageIteratorLevel::RIL_WORD);
assert_eq!(TessPageIteratorLevel::from_int(-1), TessPageIteratorLevel::RIL_BLOCK);
}
#[test]
fn test_poly_block_type_from_int() {
assert_eq!(TessPolyBlockType::from_int(1), TessPolyBlockType::PT_FLOWING_TEXT);
assert_eq!(TessPolyBlockType::from_int(6), TessPolyBlockType::PT_TABLE);
assert_eq!(TessPolyBlockType::from_int(100), TessPolyBlockType::PT_UNKNOWN);
}
#[test]
fn test_orientation_from_int() {
assert_eq!(TessOrientation::from_int(0), TessOrientation::ORIENTATION_PAGE_UP);
assert_eq!(TessOrientation::from_int(2), TessOrientation::ORIENTATION_PAGE_DOWN);
assert_eq!(TessOrientation::from_int(5), TessOrientation::ORIENTATION_PAGE_UP);
}
#[test]
fn test_paragraph_justification_from_int() {
assert_eq!(
TessParagraphJustification::from_int(1),
TessParagraphJustification::JUSTIFICATION_LEFT
);
assert_eq!(
TessParagraphJustification::from_int(3),
TessParagraphJustification::JUSTIFICATION_RIGHT
);
assert_eq!(
TessParagraphJustification::from_int(-1),
TessParagraphJustification::JUSTIFICATION_UNKNOWN
);
}
#[test]
fn test_writing_direction_from_int() {
assert_eq!(
TessWritingDirection::from_int(0),
TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT
);
assert_eq!(
TessWritingDirection::from_int(1),
TessWritingDirection::WRITING_DIRECTION_RIGHT_TO_LEFT
);
assert_eq!(
TessWritingDirection::from_int(10),
TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT
);
}
#[test]
fn test_textline_order_from_int() {
assert_eq!(
TessTextlineOrder::from_int(0),
TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT
);
assert_eq!(
TessTextlineOrder::from_int(2),
TessTextlineOrder::TEXTLINE_ORDER_TOP_TO_BOTTOM
);
assert_eq!(
TessTextlineOrder::from_int(99),
TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT
);
}
#[test]
fn test_enums_are_copy() {
fn assert_copy<T: Copy>() {}
assert_copy::<TessPageSegMode>();
assert_copy::<TessPageIteratorLevel>();
assert_copy::<TessPolyBlockType>();
assert_copy::<TessOrientation>();
assert_copy::<TessParagraphJustification>();
assert_copy::<TessWritingDirection>();
assert_copy::<TessTextlineOrder>();
}
}

View File

@@ -0,0 +1,85 @@
use std::str::Utf8Error;
use thiserror::Error;
/// Errors that can occur when using the Tesseract API.
#[derive(Error, Debug)]
pub enum TesseractError {
#[error("Failed to initialize Tesseract")]
InitError,
#[error("Failed to set image")]
SetImageError,
#[error("OCR operation failed")]
OcrError,
#[error("Invalid UTF-8 in Tesseract output")]
Utf8Error(#[from] Utf8Error),
#[error("Failed to lock mutex")]
MutexLockError,
#[error("Failed to set variable")]
SetVariableError,
#[error("Failed to get variable")]
GetVariableError,
#[error("Null pointer error")]
NullPointerError,
#[error("Invalid parameter")]
InvalidParameterError,
#[error("Layout analysis failed")]
AnalyseLayoutError,
#[error("Page processing failed")]
ProcessPagesError,
#[error("I/O error")]
IoError,
#[error("Mutex error")]
MutexError,
#[error("Invalid dimensions")]
InvalidDimensions,
#[error("Invalid bytes per pixel")]
InvalidBytesPerPixel,
#[error("Invalid bytes per line")]
InvalidBytesPerLine,
#[error("Invalid image data")]
InvalidImageData,
#[error("Uninitialized error")]
UninitializedError,
#[error("Invalid enum value: {0}")]
InvalidEnumValue(i32),
#[error("String contains null byte")]
NullByteInString,
}
/// Result type for Tesseract operations.
pub type Result<T> = std::result::Result<T, TesseractError>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_display() {
let error = TesseractError::InitError;
assert_eq!(error.to_string(), "Failed to initialize Tesseract");
let error = TesseractError::SetImageError;
assert_eq!(error.to_string(), "Failed to set image");
let error = TesseractError::OcrError;
assert_eq!(error.to_string(), "OCR operation failed");
}
#[test]
fn test_utf8_error_conversion() {
let invalid_utf8 = vec![0xFF, 0xFE];
let utf8_error = std::str::from_utf8(&invalid_utf8).unwrap_err();
let tess_error: TesseractError = utf8_error.into();
match tess_error {
TesseractError::Utf8Error(_) => {}
_ => panic!("Expected Utf8Error variant"),
}
}
#[test]
fn test_error_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<TesseractError>();
}
}

View File

@@ -0,0 +1,807 @@
//! Safe Leptonica Pix wrapper for image preprocessing before OCR.
//!
//! Provides a safe Rust wrapper around the Leptonica image-processing library.
//! `Pix` is the core Leptonica image type. All methods return `Result<Pix>`,
//! and the wrapper takes care of proper memory management via `Drop`.
//!
//! ## Pixel format
//!
//! Leptonica's 32 bpp format stores each pixel as a native 32-bit integer
//! with the logical layout (MSB→LSB): `R G B A`, i.e.
//! `(r << 24) | (g << 16) | (b << 8) | alpha`. Leptonica accesses
//! individual channels via bit-shift on the integer value, not via
//! byte-addressed pointer arithmetic, so the packing is identical on both
//! big- and little-endian hosts. Do **not** call `pixEndianByteSwap` after
//! writing pixels this way — doing so inverts the channel order.
//!
//! ## `pixDeskew` requires a binary (1 bpp) image
//!
//! Call `to_grayscale()` followed by `adaptive_threshold()` before `deskew()`.
//! `pixDeskew` internally calls `pixFindSkewSweepAndSearchScorePivot` which
//! operates on 1-bit images only; passing a colour image will return a null
//! pointer.
use crate::error::{Result, TesseractError};
use std::ffi::c_void;
// ---------------------------------------------------------------------------
// Raw Leptonica FFI declarations
// ---------------------------------------------------------------------------
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
ffi_extern! {
/// Allocates a new Pix with the given dimensions and bit depth.
fn pixCreate(width: i32, height: i32, depth: i32) -> *mut c_void;
/// Frees a Pix and sets the caller's pointer to null.
///
/// Leptonica uses a double-pointer convention: `*ppix` is set to null
/// after the call so that accidental double-frees are a no-op.
fn pixDestroy(ppix: *mut *mut c_void);
/// Sets the horizontal and vertical resolution (DPI) on a Pix.
///
/// Returns 0 on success, non-zero on error.
fn pixSetResolution(pix: *mut c_void, xres: i32, yres: i32) -> i32;
/// Returns the width of the Pix in pixels.
fn pixGetWidth(pix: *const c_void) -> i32;
/// Returns the height of the Pix in pixels.
fn pixGetHeight(pix: *const c_void) -> i32;
/// Returns the bit depth of the Pix (1, 2, 4, 8, 16, or 32).
fn pixGetDepth(pix: *const c_void) -> i32;
/// Returns the number of 32-bit words per row (words-per-line).
fn pixGetWpl(pix: *const c_void) -> i32;
/// Returns a mutable pointer to the start of the pixel data array.
///
/// The data is stored as rows of 32-bit words; each word covers 32/depth pixels.
fn pixGetData(pix: *mut c_void) -> *mut u32;
/// Deskews a 1 bpp image using a sweep-and-search algorithm.
///
/// `redsearch` is the reduction factor used during the search; pass 0 for
/// the Leptonica default (2x reduction). Returns a new deskewed Pix on
/// success, or null on failure. The input Pix is **not** consumed.
fn pixDeskew(pixs: *mut c_void, redsearch: i32) -> *mut c_void;
/// Estimates the skew angle and confidence for a 1 bpp image.
///
/// Writes the angle (degrees, positive = counter-clockwise) into `*pangle`
/// and a confidence score (01) into `*pconf`. Returns 0 on success.
fn pixFindSkew(pixs: *mut c_void, pangle: *mut f32, pconf: *mut f32) -> i32;
/// Applies Otsu adaptive thresholding to produce a binarised Pix.
///
/// `sx`/`sy` are the tile dimensions; `smoothx`/`smoothy` are half-widths
/// for smoothing the threshold map; `scorefract` controls threshold acceptance
/// (typical value: 0.1). `ppixth` (optional) receives the threshold image;
/// `ppixd` receives the binarised output.
fn pixOtsuAdaptiveThreshold(
pixs: *mut c_void,
sx: i32,
sy: i32,
smoothx: i32,
smoothy: i32,
scorefract: f32,
ppixth: *mut *mut c_void,
ppixd: *mut *mut c_void,
) -> i32;
/// Normalises the background of a grayscale image using morphological operations.
///
/// `reduction` is the subsampling factor (e.g. 4), `size` is the morphological
/// structuring-element half-size (e.g. 15), and `bgval` is the target background
/// value (e.g. 200). Returns a new normalised Pix, or null on failure.
fn pixBackgroundNormMorph(
pixs: *mut c_void,
pixim: *mut c_void,
reduction: i32,
size: i32,
bgval: i32,
) -> *mut c_void;
/// Applies unsharp masking to sharpen a grayscale or colour Pix.
///
/// `halfwidth` is the half-size of the blur kernel; `fract` controls the
/// sharpening strength (0.01.0 typical). Returns a new Pix, or null on failure.
fn pixUnsharpMasking(pixs: *mut c_void, halfwidth: i32, fract: f32) -> *mut c_void;
/// Scales a Pix by independent x and y factors using the best available method.
///
/// Returns a new scaled Pix, or null on failure. The input Pix is **not** consumed.
fn pixScale(pixs: *mut c_void, scalex: f32, scaley: f32) -> *mut c_void;
/// Converts an RGB (32 bpp) Pix to 8 bpp grayscale.
///
/// `rwt`, `gwt`, `bwt` are the red, green, and blue channel weights; pass
/// 0.0 for all three to use Leptonica's default equal weights. Returns a new
/// 8 bpp Pix, or null on failure.
fn pixConvertRGBToGray(pixs: *mut c_void, rwt: f32, gwt: f32, bwt: f32) -> *mut c_void;
/// Creates a Leptonica BOX with the given coordinates.
fn boxCreate(x: i32, y: i32, w: i32, h: i32) -> *mut c_void;
/// Frees a Leptonica BOX.
fn boxDestroy(pbox: *mut *mut c_void);
/// Clips a rectangular region from a Pix.
///
/// Returns a new Pix containing the clipped region, or null on failure.
/// `pboxc` (optional) receives the actual clipped box; pass null to ignore.
fn pixClipRectangle(pixs: *mut c_void, box_: *mut c_void, pboxc: *mut *mut c_void) -> *mut c_void;
/// Counts connected components in a 1 bpp image.
///
/// `connectivity` is 4 or 8. Writes the count to `*pcount`.
/// Returns 0 on success.
fn pixCountConnComp(pix: *mut c_void, connectivity: i32, pcount: *mut i32) -> i32;
/// Retrieves the horizontal and vertical resolution (DPI) from a Pix.
///
/// Writes the x-resolution into `*pxres` and y-resolution into `*pyres`.
/// Returns 0 on success, non-zero on error.
fn pixGetResolution(pix: *const c_void, pxres: *mut i32, pyres: *mut i32) -> i32;
}
// ---------------------------------------------------------------------------
// Safe Pix wrapper
// ---------------------------------------------------------------------------
/// Safe wrapper around a Leptonica `PIX *` image object.
///
/// Owns the underlying allocation and frees it in `Drop`. All methods that
/// return a new image allocate a fresh `Pix`; the receiver is never consumed.
///
/// # Thread safety
///
/// `Pix` is `Send` because Leptonica image objects are independent heap
/// allocations with no shared mutable state. Concurrent mutation from multiple
/// threads is **not** safe (no `Sync`).
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
pub struct Pix {
ptr: *mut c_void,
}
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
impl std::fmt::Debug for Pix {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Pix").field("ptr", &self.ptr).finish()
}
}
// SAFETY: A Pix owns a uniquely heap-allocated Leptonica PIX. There is no
// interior mutability shared across thread boundaries, so transferring
// ownership to another thread is safe.
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
unsafe impl Send for Pix {}
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
impl Pix {
// -----------------------------------------------------------------------
// Construction
// -----------------------------------------------------------------------
/// Creates a 32 bpp Leptonica Pix from a packed RGB byte slice.
///
/// `data` must contain exactly `width * height * 3` bytes in left-to-right,
/// top-to-bottom, `R G B` interleaved order.
///
/// The DPI is set to 300 × 300 which is a sensible default for OCR input.
///
/// # Errors
///
/// Returns `TesseractError::InvalidImageData` if `data` length does not
/// match `width * height * 3`, if either dimension is zero, or if
/// Leptonica's `pixCreate` returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// let rgb = vec![255u8; 4 * 4 * 3]; // 4×4 white image
/// let pix = Pix::from_raw_rgb(&rgb, 4, 4).unwrap();
/// assert_eq!(pix.width(), 4);
/// assert_eq!(pix.height(), 4);
/// assert_eq!(pix.depth(), 32);
/// ```
pub fn from_raw_rgb(data: &[u8], width: u32, height: u32) -> Result<Pix> {
let expected = (width as usize)
.checked_mul(height as usize)
.and_then(|n| n.checked_mul(3))
.ok_or(TesseractError::InvalidImageData)?;
if data.len() != expected || width == 0 || height == 0 {
return Err(TesseractError::InvalidImageData);
}
// SAFETY: pixCreate() allocates a new PIX with the requested dimensions.
// It is safe because:
// 1. width, height, and depth (32) are valid positive integers.
// 2. pixCreate() documents that it returns null only on allocation
// failure, which we check immediately below.
let pix_ptr = unsafe { pixCreate(width as i32, height as i32, 32) };
if pix_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: pixGetData() returns a mutable pointer into the allocated pixel
// buffer that is valid for the lifetime of the Pix. We own pix_ptr
// exclusively at this point and have not exposed it to any other code.
let data_ptr = unsafe { pixGetData(pix_ptr) };
if data_ptr.is_null() {
// Clean up before returning the error.
// SAFETY: pix_ptr is a valid non-null allocation from pixCreate().
// Passing &mut pix_ptr satisfies the double-pointer convention; after
// this call pix_ptr is set to null by Leptonica.
let mut ptr = pix_ptr;
unsafe { pixDestroy(&mut ptr) };
return Err(TesseractError::NullPointerError);
}
// SAFETY: pixGetWpl() is a pure read of the Pix header that is always
// valid for a correctly-allocated Pix.
// For a 32 bpp image, each pixel occupies exactly one 32-bit word, so
// wpl == width (no padding bytes). The loop below uses `row * wpl + col`
// to index into the pixel data, which is within bounds because col < width <= wpl.
let wpl = unsafe { pixGetWpl(pix_ptr) } as usize;
// Write RGB pixels into the Leptonica data buffer.
//
// Leptonica's 32 bpp pixel format stores each pixel as a native
// 32-bit integer word with the logical layout (MSB→LSB): R G B A,
// i.e. `(r << 24) | (g << 16) | (b << 8) | alpha`. This is the
// same bit pattern regardless of host endianness — Leptonica treats
// the data as an array of 32-bit integers and accesses individual
// bytes via bit-shift, not via byte-addressed pointer arithmetic.
//
// Therefore we pack directly as `(r << 24) | (g << 16) | (b << 8) | 0xFF`
// and write the resulting u32 without any byte-swapping. Calling
// `pixEndianByteSwap` would invert the channel order, producing
// A B G R instead of R G B A.
for row in 0..(height as usize) {
for col in 0..(width as usize) {
let src = (row * width as usize + col) * 3;
let r = data[src] as u32;
let g = data[src + 1] as u32;
let b = data[src + 2] as u32;
// Pack channels as (MSB) R G B A (LSB) in the 32-bit integer.
let word: u32 = (r << 24) | (g << 16) | (b << 8) | 0xFF;
// SAFETY: data_ptr is a valid writable pointer into the Leptonica
// pixel buffer. The offset `row * wpl + col` is within bounds because:
// 1. wpl >= width (Leptonica pads rows to 32-bit word boundaries).
// 2. row < height and col < width by loop invariants.
unsafe {
*data_ptr.add(row * wpl + col) = word;
}
}
}
// Set a sensible default DPI for OCR processing.
// SAFETY: pix_ptr is valid and non-null. pixSetResolution only writes
// two integer fields in the Pix header.
unsafe { pixSetResolution(pix_ptr, 300, 300) };
Ok(Pix { ptr: pix_ptr })
}
// -----------------------------------------------------------------------
// Image processing operations
// -----------------------------------------------------------------------
/// Deskews this image, returning a new corrected Pix.
///
/// **Note:** `pixDeskew` requires a 1 bpp (binary) image. Call
/// `to_grayscale()` followed by `adaptive_threshold()` before invoking
/// this method on a colour or grayscale Pix.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if Leptonica returns null
/// (typically because the input is not 1 bpp or the image is too small).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![0u8; 100 * 100 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 100, 100).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let binary = gray.adaptive_threshold(32, 32).unwrap();
/// let deskewed = binary.deskew().unwrap();
/// ```
pub fn deskew(&self) -> Result<Pix> {
// SAFETY: self.ptr is a valid non-null Pix we own. pixDeskew() does
// not take ownership; it creates and returns a new Pix allocation.
// We check for null to handle the case where the operation fails
// (e.g. input is not 1 bpp).
let result = unsafe { pixDeskew(self.ptr, 0) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Estimates the skew angle (degrees) and confidence (01) for this image.
///
/// A positive angle indicates counter-clockwise skew. Confidence near 1.0
/// means a clear dominant skew direction was found.
///
/// **Note:** Like `deskew`, this operates on 1 bpp images.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixFindSkew` returns a non-zero
/// status (e.g. insufficient contrast or wrong bit depth).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![0u8; 100 * 100 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 100, 100).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let binary = gray.adaptive_threshold(32, 32).unwrap();
/// let (angle, confidence) = binary.find_skew().unwrap();
/// println!("Skew: {angle:.2}° (confidence {confidence:.2})");
/// ```
pub fn find_skew(&self) -> Result<(f32, f32)> {
let mut angle: f32 = 0.0;
let mut conf: f32 = 0.0;
// SAFETY: self.ptr is valid and non-null. We pass pointers to local
// stack-allocated f32 values, which are valid write targets for the
// duration of this call. pixFindSkew() writes into them and returns
// an integer status code.
let status = unsafe { pixFindSkew(self.ptr, &mut angle, &mut conf) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok((angle, conf))
}
}
/// Binarises this image using Otsu adaptive thresholding.
///
/// `tile_width` and `tile_height` control the size of the local regions
/// used to compute the threshold. Values around 1664 work well for typical
/// document images; smaller tiles follow local contrast more closely.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if Leptonica returns null, or
/// `TesseractError::OcrError` if `pixOtsuAdaptiveThreshold` returns a
/// non-zero status.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![128u8; 64 * 64 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 64, 64).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let binary = gray.adaptive_threshold(32, 32).unwrap();
/// assert_eq!(binary.depth(), 1);
/// ```
pub fn adaptive_threshold(&self, tile_width: i32, tile_height: i32) -> Result<Pix> {
let mut result: *mut c_void = std::ptr::null_mut();
// SAFETY: self.ptr is a valid non-null Pix. We pass null for ppixth
// because we do not need the intermediate threshold image. result is a
// local pointer that will be written by pixOtsuAdaptiveThreshold(); we
// check it for null before wrapping in a Pix.
let status = unsafe {
pixOtsuAdaptiveThreshold(
self.ptr,
tile_width,
tile_height,
0, // smoothx: no smoothing
0, // smoothy: no smoothing
0.1, // scorefract: Leptonica-recommended default
std::ptr::null_mut(), // ppixth: we don't need the threshold map
&mut result,
)
};
if status != 0 {
return Err(TesseractError::OcrError);
}
if result.is_null() {
return Err(TesseractError::NullPointerError);
}
Ok(Pix { ptr: result })
}
/// Returns the horizontal and vertical resolution (DPI) of this image.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixGetResolution` fails.
pub fn get_resolution(&self) -> Result<(i32, i32)> {
let mut xres: i32 = 0;
let mut yres: i32 = 0;
// SAFETY: self.ptr is a valid non-null Pix. xres and yres are valid
// stack-allocated i32 values. pixGetResolution reads the Pix header.
let status = unsafe { pixGetResolution(self.ptr, &mut xres, &mut yres) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok((xres, yres))
}
}
/// Sets the horizontal and vertical resolution (DPI) on this image.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixSetResolution` fails.
pub fn set_resolution(&mut self, xres: i32, yres: i32) -> Result<()> {
// SAFETY: self.ptr is a valid non-null Pix. pixSetResolution only
// writes two integer fields in the Pix header.
let status = unsafe { pixSetResolution(self.ptr, xres, yres) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok(())
}
}
/// Ensures the image has a valid (non-zero) DPI resolution.
///
/// If both x and y resolution are zero, sets them to 72 DPI as a
/// safe fallback. This prevents Leptonica operations that depend on
/// resolution metadata from producing incorrect results.
fn ensure_valid_resolution(&self) {
if let Ok((xres, yres)) = self.get_resolution()
&& (xres == 0 || yres == 0)
{
// SAFETY: self.ptr is valid. We set a safe default DPI.
unsafe { pixSetResolution(self.ptr, 72, 72) };
}
}
/// Normalises the background of this image using morphological operations.
///
/// Useful as a preprocessing step when the document has uneven illumination
/// or a non-white background. Returns a new normalised Pix.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixBackgroundNormMorph`
/// returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![200u8; 100 * 100 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 100, 100).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let normalised = gray.background_normalize().unwrap();
/// ```
pub fn background_normalize(&self) -> Result<Pix> {
self.ensure_valid_resolution();
// SAFETY: self.ptr is a valid non-null Pix. We pass null for pixim
// (no mask image). pixBackgroundNormMorph() returns a newly allocated
// Pix or null on failure.
let result = unsafe {
pixBackgroundNormMorph(
self.ptr,
std::ptr::null_mut(), // pixim: no mask
4, // reduction: 4x subsampling
15, // size: morphological SE half-size
200, // bgval: target background value
)
};
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Applies unsharp masking to sharpen this image.
///
/// `halfwidth` is the half-size of the blur kernel (e.g. 15).
/// `fract` is the sharpening fraction in the range 0.01.0; values
/// around 0.30.5 produce visible sharpening without artefacts.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixUnsharpMasking`
/// returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![128u8; 64 * 64 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 64, 64).unwrap();
/// let sharpened = pix.unsharp_mask(2, 0.4).unwrap();
/// ```
pub fn unsharp_mask(&self, halfwidth: i32, fract: f32) -> Result<Pix> {
self.ensure_valid_resolution();
// SAFETY: self.ptr is valid and non-null. pixUnsharpMasking() returns
// a new Pix without modifying or taking ownership of the source.
let result = unsafe { pixUnsharpMasking(self.ptr, halfwidth, fract) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Scales this image by independent x and y factors.
///
/// Leptonica automatically chooses the best scaling algorithm based on
/// the scale factors and bit depth (area mapping for downscaling,
/// linear interpolation for upscaling).
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixScale` returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![255u8; 40 * 40 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 40, 40).unwrap();
/// let upscaled = pix.scale(2.0, 2.0).unwrap();
/// assert_eq!(upscaled.width(), 80);
/// assert_eq!(upscaled.height(), 80);
/// ```
pub fn scale(&self, sx: f32, sy: f32) -> Result<Pix> {
// SAFETY: self.ptr is valid and non-null. pixScale() creates a new Pix
// and does not modify the source.
let result = unsafe { pixScale(self.ptr, sx, sy) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Clips a rectangular sub-region from this image.
///
/// Returns a new Pix containing only the pixels within the given rectangle.
/// Coordinates are in pixel space: (x, y) is the top-left corner.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if the crop fails.
pub fn clip_rectangle(&self, x: i32, y: i32, w: i32, h: i32) -> Result<Pix> {
// SAFETY: boxCreate allocates a new BOX on the heap.
let box_ = unsafe { boxCreate(x, y, w, h) };
if box_.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: pixClipRectangle returns a new Pix clipped to the BOX region.
// We pass null for pboxc (we don't need the clipped box coordinates back).
let result = unsafe { pixClipRectangle(self.ptr, box_, std::ptr::null_mut()) };
// SAFETY: Free the BOX we allocated.
let mut box_mut = box_;
unsafe { boxDestroy(&mut box_mut) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Counts connected components in a 1 bpp (binary) image.
///
/// `connectivity` should be 4 or 8.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixCountConnComp` fails
/// (e.g., wrong bit depth — image must be 1 bpp).
pub fn count_connected_components(&self, connectivity: i32) -> Result<i32> {
let mut count: i32 = 0;
// SAFETY: self.ptr is a valid Pix. count is a valid stack local.
let status = unsafe { pixCountConnComp(self.ptr, connectivity, &mut count) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok(count)
}
}
/// Converts this 32 bpp RGB image to an 8 bpp grayscale Pix.
///
/// Passing 0.0 for all weight parameters instructs Leptonica to use its
/// default perceptual weights (approx. 0.299 R, 0.587 G, 0.114 B).
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixConvertRGBToGray`
/// returns null (e.g. the source is not 32 bpp).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![100u8, 150u8, 200u8].repeat(10 * 10);
/// # let pix = Pix::from_raw_rgb(&rgb, 10, 10).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// assert_eq!(gray.depth(), 8);
/// ```
pub fn to_grayscale(&self) -> Result<Pix> {
self.ensure_valid_resolution();
// SAFETY: self.ptr is valid and non-null. pixConvertRGBToGray() returns
// a new 8 bpp Pix; the source is not modified.
let result = unsafe { pixConvertRGBToGray(self.ptr, 0.0, 0.0, 0.0) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
// -----------------------------------------------------------------------
// Accessors
// -----------------------------------------------------------------------
/// Returns the raw Leptonica `PIX *` pointer.
///
/// Intended for passing this image to `TesseractAPI::set_image_2`.
///
/// # Safety
///
/// The caller must ensure the `Pix` outlives any use of the returned
/// pointer. `TessBaseAPISetImage2` **borrows** the pointer — it does not
/// take ownership — so the `Pix` must remain alive until after
/// `TessBaseAPIRecognize` (or any other Tesseract call that consumes the
/// image data) has completed. Dropping the `Pix` while Tesseract holds
/// the pointer will result in a use-after-free.
///
/// The caller must **not** free the returned pointer; `Pix::drop` is
/// solely responsible for deallocation via `pixDestroy`.
pub fn as_ptr(&self) -> *mut c_void {
self.ptr
}
/// Returns the width of the image in pixels.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let pix = Pix::from_raw_rgb(&vec![0u8; 8 * 6 * 3], 8, 6).unwrap();
/// assert_eq!(pix.width(), 8);
/// ```
pub fn width(&self) -> i32 {
// SAFETY: self.ptr is a valid non-null Pix. pixGetWidth() is a pure
// read of the Pix header struct; it does not mutate any state.
unsafe { pixGetWidth(self.ptr) }
}
/// Returns the height of the image in pixels.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let pix = Pix::from_raw_rgb(&vec![0u8; 8 * 6 * 3], 8, 6).unwrap();
/// assert_eq!(pix.height(), 6);
/// ```
pub fn height(&self) -> i32 {
// SAFETY: self.ptr is a valid non-null Pix. pixGetHeight() is a pure
// read of the Pix header struct.
unsafe { pixGetHeight(self.ptr) }
}
/// Returns the bit depth of the image (1, 8, or 32 for this module's usage).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let pix = Pix::from_raw_rgb(&vec![0u8; 4 * 4 * 3], 4, 4).unwrap();
/// assert_eq!(pix.depth(), 32);
/// ```
pub fn depth(&self) -> i32 {
// SAFETY: self.ptr is a valid non-null Pix. pixGetDepth() is a pure
// read of the Pix header struct.
unsafe { pixGetDepth(self.ptr) }
}
}
// ---------------------------------------------------------------------------
// Drop implementation
// ---------------------------------------------------------------------------
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
impl Drop for Pix {
fn drop(&mut self) {
if !self.ptr.is_null() {
// SAFETY: self.ptr is a non-null Leptonica PIX that we allocated and
// own exclusively. pixDestroy() takes a double pointer, sets *ppix to
// null after freeing, and is safe to call exactly once per allocation.
// After this call self.ptr is null (Leptonica sets it), preventing
// any double-free if drop() were somehow called again.
unsafe { pixDestroy(&mut self.ptr) };
}
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
mod tests {
use super::*;
fn make_rgb_pix(width: u32, height: u32, fill: u8) -> Pix {
let data = vec![fill; (width * height * 3) as usize];
Pix::from_raw_rgb(&data, width, height).expect("from_raw_rgb failed")
}
#[test]
fn test_from_raw_rgb_dimensions() {
let pix = make_rgb_pix(16, 8, 200);
assert_eq!(pix.width(), 16);
assert_eq!(pix.height(), 8);
assert_eq!(pix.depth(), 32);
}
#[test]
fn test_from_raw_rgb_wrong_length() {
let data = vec![0u8; 10]; // too short for 4×4
let err = Pix::from_raw_rgb(&data, 4, 4).unwrap_err();
assert!(matches!(err, TesseractError::InvalidImageData));
}
#[test]
fn test_from_raw_rgb_zero_dimensions() {
let err = Pix::from_raw_rgb(&[], 0, 4).unwrap_err();
assert!(matches!(err, TesseractError::InvalidImageData));
let err = Pix::from_raw_rgb(&[], 4, 0).unwrap_err();
assert!(matches!(err, TesseractError::InvalidImageData));
}
#[test]
fn test_as_ptr_is_non_null() {
let pix = make_rgb_pix(8, 8, 128);
assert!(!pix.as_ptr().is_null());
}
#[test]
fn test_to_grayscale() {
let pix = make_rgb_pix(32, 32, 150);
let gray = pix.to_grayscale().expect("to_grayscale failed");
assert_eq!(gray.width(), 32);
assert_eq!(gray.height(), 32);
assert_eq!(gray.depth(), 8);
}
#[test]
fn test_scale_up() {
let pix = make_rgb_pix(20, 10, 100);
let scaled = pix.scale(2.0, 2.0).expect("scale failed");
assert_eq!(scaled.width(), 40);
assert_eq!(scaled.height(), 20);
}
#[test]
fn test_unsharp_mask_returns_same_dimensions() {
let pix = make_rgb_pix(32, 32, 200);
let sharpened = pix.unsharp_mask(2, 0.4).expect("unsharp_mask failed");
assert_eq!(sharpened.width(), 32);
assert_eq!(sharpened.height(), 32);
}
#[test]
fn test_adaptive_threshold_produces_1bpp() {
let pix = make_rgb_pix(64, 64, 180);
let gray = pix.to_grayscale().expect("to_grayscale failed");
let binary = gray.adaptive_threshold(32, 32).expect("adaptive_threshold failed");
assert_eq!(binary.depth(), 1);
}
}

View File

@@ -0,0 +1,218 @@
#![cfg_attr(
not(any(feature = "build-tesseract", feature = "build-tesseract-wasm")),
allow(unused_variables, dead_code)
)]
#![allow(clippy::arc_with_non_send_sync)]
#![allow(clippy::missing_transmute_annotations)]
#![allow(clippy::type_complexity)]
#![allow(clippy::new_without_default)]
#![allow(clippy::not_unsafe_ptr_arg_deref)]
#![allow(clippy::cmp_null)]
//! # kreuzberg-tesseract
//!
//! `kreuzberg-tesseract` provides safe Rust bindings for Tesseract OCR with built-in compilation
//! of Tesseract and Leptonica libraries. This crate aims to make OCR functionality
//! easily accessible in Rust projects while handling the complexity of interfacing
//! with the underlying C++ libraries.
//!
//! ## Usage
//!
//! Here's a basic example of how to use `kreuzberg-tesseract`:
//!
//! ```rust
//! use std::path::PathBuf;
//! use std::error::Error;
//! use kreuzberg_tesseract::TesseractAPI;
//!
//! fn get_default_tessdata_dir() -> PathBuf {
//! if cfg!(target_os = "macos") {
//! let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
//! PathBuf::from(home_dir)
//! .join("Library")
//! .join("Application Support")
//! .join("kreuzberg-tesseract")
//! .join("tessdata")
//! } else if cfg!(target_os = "linux") {
//! let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
//! PathBuf::from(home_dir)
//! .join(".kreuzberg-tesseract")
//! .join("tessdata")
//! } else if cfg!(target_os = "windows") {
//! PathBuf::from(std::env::var("APPDATA").expect("APPDATA environment variable not set"))
//! .join("kreuzberg-tesseract")
//! .join("tessdata")
//! } else {
//! panic!("Unsupported operating system");
//! }
//! }
//!
//! fn get_tessdata_dir() -> PathBuf {
//! match std::env::var("TESSDATA_PREFIX") {
//! Ok(dir) => {
//! let path = PathBuf::from(dir);
//! let path = if path.ends_with("tessdata") { path } else { path.join("tessdata") };
//! println!("Using TESSDATA_PREFIX directory: {:?}", path);
//! path
//! }
//! Err(_) => {
//! let default_dir = get_default_tessdata_dir();
//! println!(
//! "TESSDATA_PREFIX not set, using default directory: {:?}",
//! default_dir
//! );
//! default_dir
//! }
//! }
//! }
//!
//! fn main() -> Result<(), Box<dyn Error>> {
//! let api = TesseractAPI::new()?;
//!
//! // Get tessdata directory (uses default location or TESSDATA_PREFIX if set)
//! let tessdata_dir = get_tessdata_dir();
//! api.init(tessdata_dir.to_str().unwrap(), "eng")?;
//!
//! let width = 24;
//! let height = 24;
//! let bytes_per_pixel = 1;
//! let bytes_per_line = width * bytes_per_pixel;
//!
//! // Initialize image data with all white pixels
//! let mut image_data = vec![255u8; width * height];
//!
//! // Draw number 9 with clearer distinction
//! for y in 4..19 {
//! for x in 7..17 {
//! // Top bar
//! if y == 4 && x >= 8 && x <= 15 {
//! image_data[y * width + x] = 0;
//! }
//! // Top curve left side
//! if y >= 4 && y <= 10 && x == 7 {
//! image_data[y * width + x] = 0;
//! }
//! // Top curve right side
//! if y >= 4 && y <= 11 && x == 16 {
//! image_data[y * width + x] = 0;
//! }
//! // Middle bar
//! if y == 11 && x >= 8 && x <= 15 {
//! image_data[y * width + x] = 0;
//! }
//! // Bottom right vertical line
//! if y >= 11 && y <= 18 && x == 16 {
//! image_data[y * width + x] = 0;
//! }
//! // Bottom bar
//! if y == 18 && x >= 8 && x <= 15 {
//! image_data[y * width + x] = 0;
//! }
//! }
//! }
//!
//! // Set the image data
//! api.set_image(&image_data, width.try_into().unwrap(), height.try_into().unwrap(), bytes_per_pixel.try_into().unwrap(), bytes_per_line.try_into().unwrap())?;
//!
//! // Set whitelist for digits only
//! api.set_variable("tessedit_char_whitelist", "0123456789")?;
//!
//! // Set PSM mode to single character
//! api.set_variable("tessedit_pageseg_mode", "10")?;
//!
//! // Get the recognized text
//! let text = api.get_utf8_text()?;
//! println!("Recognized text: {}", text.trim());
//!
//! Ok(())
//! }
//! ```
/// Declare FFI functions with `extern "C-unwind"` on native targets (to catch
/// C++ exceptions from Tesseract/Leptonica) and `extern "C"` on WASM (where
/// the LLVM backend does not support `cleanupret` / C++ unwinding).
macro_rules! ffi_extern {
(
$(
$(#[$meta:meta])*
$vis:vis fn $name:ident($($arg:ident : $ty:ty),* $(,)?) $(-> $ret:ty)?;
)*
) => {
#[cfg(not(target_arch = "wasm32"))]
unsafe extern "C-unwind" {
$(
$(#[$meta])*
$vis fn $name($($arg : $ty),*) $(-> $ret)?;
)*
}
#[cfg(target_arch = "wasm32")]
unsafe extern "C" {
$(
$(#[$meta])*
$vis fn $name($($arg : $ty),*) $(-> $ret)?;
)*
}
};
}
pub use error::{Result, TesseractError};
mod error;
// WASM: Override __cxa_atexit to be a no-op. WASI SDK's __cxa_atexit calls calloc during
// C++ static initialization, which crashes because dlmalloc's heap isn't properly set up
// for wasm32-unknown-unknown. Since WASM modules never exit normally, atexit handlers
// are unnecessary.
#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))]
mod wasm_compat {
#[unsafe(no_mangle)]
pub unsafe extern "C" fn __cxa_atexit(
_func: Option<unsafe extern "C" fn(*mut core::ffi::c_void)>,
_arg: *mut core::ffi::c_void,
_dso_handle: *mut core::ffi::c_void,
) -> i32 {
0 // Success, but don't actually register anything
}
}
mod page_iterator;
pub use page_iterator::{BlockInfo, PageIterator, ParaInfo};
mod result_iterator;
pub use result_iterator::{FontAttributes, ResultIterator, WordData};
mod choice_iterator;
pub use choice_iterator::ChoiceIterator;
mod monitor;
pub use monitor::TessMonitor;
mod result_renderer;
pub use result_renderer::TessResultRenderer;
mod mutable_iterator;
pub use mutable_iterator::MutableIterator;
mod enums;
pub use enums::{
TessOrientation, TessPageIteratorLevel, TessPageSegMode, TessParagraphJustification, TessPolyBlockType,
TessTextlineOrder, TessWritingDirection,
};
mod api;
pub use api::{BoundingBoxArray, TesseractAPI};
pub mod leptonica;
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
pub use leptonica::Pix;
/// Returns the compile-time-bundled English `eng.traineddata` blob when the
/// `bundle-tessdata-eng` feature is enabled, otherwise `None`.
///
/// The bundled data is the `tessdata_fast` variant (~4 MB) downloaded by
/// `build.rs` to `TESSDATA_PREFIX_BUNDLED/tessdata/eng.traineddata`. Embedding
/// it lets WASM builds drive Tesseract OCR without filesystem access or
/// runtime fetches.
#[cfg(feature = "bundle-tessdata-eng")]
pub fn bundled_eng_traineddata() -> Option<&'static [u8]> {
Some(include_bytes!(concat!(
env!("TESSDATA_PREFIX_BUNDLED"),
"/tessdata/eng.traineddata"
)))
}
/// Returns `None` when the `bundle-tessdata-eng` feature is disabled.
#[cfg(not(feature = "bundle-tessdata-eng"))]
pub fn bundled_eng_traineddata() -> Option<&'static [u8]> {
None
}

View File

@@ -0,0 +1,68 @@
use crate::error::{Result, TesseractError};
use std::os::raw::{c_int, c_void};
use std::sync::{Arc, Mutex};
pub struct TessMonitor {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for TessMonitor {}
unsafe impl Sync for TessMonitor {}
impl TessMonitor {
/// Creates a new instance of the TessMonitor.
///
/// # Returns
///
/// Returns the new instance of the TessMonitor.
pub fn new() -> Self {
let handle = unsafe { TessMonitorCreate() };
TessMonitor {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Sets the deadline for the monitor.
///
/// # Arguments
///
/// * `deadline` - Deadline in milliseconds.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn set_deadline(&self, deadline: i32) -> Result<()> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
unsafe { TessMonitorSetDeadlineMSecs(*handle, deadline) };
Ok(())
}
/// Gets the progress of the monitor.
///
/// # Returns
///
/// Returns the progress as an `i32` if successful, otherwise returns an error.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn get_progress(&self) -> Result<i32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessMonitorGetProgress(*handle) })
}
}
impl Drop for TessMonitor {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessMonitorDelete(*handle) };
}
}
}
ffi_extern! {
pub fn TessMonitorCreate() -> *mut c_void;
pub fn TessMonitorDelete(monitor: *mut c_void);
pub fn TessMonitorSetDeadlineMSecs(monitor: *mut c_void, deadline: c_int);
pub fn TessMonitorGetProgress(monitor: *mut c_void) -> c_int;
}

View File

@@ -0,0 +1,197 @@
use crate::error::{Result, TesseractError};
use std::ffi::CStr;
use std::os::raw::{c_char, c_void};
use std::sync::Arc;
use std::sync::Mutex;
use crate::result_iterator::{
TessResultIteratorConfidence, TessResultIteratorGetUTF8Text, TessResultIteratorNext,
TessResultIteratorSymbolIsDropcap, TessResultIteratorSymbolIsSubscript, TessResultIteratorSymbolIsSuperscript,
TessResultIteratorWordFontAttributes, TessResultIteratorWordIsFromDictionary, TessResultIteratorWordIsNumeric,
TessResultIteratorWordRecognitionLanguage,
};
pub struct MutableIterator {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for MutableIterator {}
unsafe impl Sync for MutableIterator {}
impl MutableIterator {
/// Creates a new instance of the MutableIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the MutableIterator.
pub fn new(handle: *mut c_void) -> Self {
MutableIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Gets the UTF-8 text for the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the text.
pub fn get_utf8_text(&self, level: i32) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
let text_ptr = unsafe { TessResultIteratorGetUTF8Text(*handle, level) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let result = c_str.to_str()?.to_owned();
unsafe { TessDeleteText(text_ptr as *mut c_char) };
Ok(result)
}
/// Gets the confidence of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the confidence.
pub fn confidence(&self, level: i32) -> Result<f32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorConfidence(*handle, level) })
}
/// Gets the recognition language of the current iterator.
///
/// # Returns
///
/// Returns the recognition language as a `String` if successful, otherwise returns an error.
pub fn word_recognition_language(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
let lang_ptr = unsafe { TessResultIteratorWordRecognitionLanguage(*handle) };
if lang_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
let c_str = unsafe { CStr::from_ptr(lang_ptr) };
Ok(c_str.to_str()?.to_owned())
}
/// Gets the font attributes of the current iterator.
///
/// # Returns
///
/// Returns the font attributes as a tuple if successful, otherwise returns an error.
pub fn word_font_attributes(&self) -> Result<(bool, bool, bool, bool, bool, bool, i32, i32)> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
let mut is_bold = 0;
let mut is_italic = 0;
let mut is_underlined = 0;
let mut is_monospace = 0;
let mut is_serif = 0;
let mut is_smallcaps = 0;
let mut pointsize = 0;
let mut font_id = 0;
let result = unsafe {
TessResultIteratorWordFontAttributes(
*handle,
&mut is_bold,
&mut is_italic,
&mut is_underlined,
&mut is_monospace,
&mut is_serif,
&mut is_smallcaps,
&mut pointsize,
&mut font_id,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
is_bold != 0,
is_italic != 0,
is_underlined != 0,
is_monospace != 0,
is_serif != 0,
is_smallcaps != 0,
pointsize,
font_id,
))
}
}
/// Checks if the current word is from the dictionary.
///
/// # Returns
///
/// Returns `Ok(true)` if the current word is from the dictionary, otherwise returns `Ok(false)`.
pub fn word_is_from_dictionary(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorWordIsFromDictionary(*handle) != 0 })
}
/// Checks if the current word is numeric.
///
/// # Returns
///
/// Returns `Ok(true)` if the current word is numeric, otherwise returns `Ok(false)`.
pub fn word_is_numeric(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorWordIsNumeric(*handle) != 0 })
}
/// Checks if the current symbol is superscript.
///
/// # Returns
///
/// Returns `Ok(true)` if the current symbol is superscript, otherwise returns `Ok(false)`.
pub fn symbol_is_superscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorSymbolIsSuperscript(*handle) != 0 })
}
/// Checks if the current symbol is subscript.
///
/// # Returns
///
/// Returns `Ok(true)` if the current symbol is subscript, otherwise returns `Ok(false)`.
pub fn symbol_is_subscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorSymbolIsSubscript(*handle) != 0 })
}
/// Checks if the current symbol is dropcap.
///
/// # Returns
///
/// Returns `Ok(true)` if the current symbol is dropcap, otherwise returns `Ok(false)`.
pub fn symbol_is_dropcap(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorSymbolIsDropcap(*handle) != 0 })
}
/// Gets the next iterator.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
///
/// # Returns
///
/// Returns `true` if the next iterator is successful, otherwise returns `false`.
pub fn next(&self, level: i32) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorNext(*handle, level) != 0 })
}
}
impl Drop for MutableIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessResultIteratorDelete(*handle) };
}
}
}
ffi_extern! {
pub fn TessResultIteratorDelete(handle: *mut c_void);
pub fn TessDeleteText(text: *mut c_char);
}

View File

@@ -0,0 +1,421 @@
use crate::TesseractError;
use crate::enums::{
TessOrientation, TessPageIteratorLevel, TessParagraphJustification, TessPolyBlockType, TessTextlineOrder,
TessWritingDirection,
};
use crate::error::Result;
use std::os::raw::{c_float, c_int, c_void};
use std::sync::Arc;
use std::sync::Mutex;
/// Block-level layout information from Tesseract.
#[derive(Debug, Clone)]
pub struct BlockInfo {
pub block_type: TessPolyBlockType,
pub left: i32,
pub top: i32,
pub right: i32,
pub bottom: i32,
}
/// Paragraph-level information from Tesseract.
#[derive(Debug, Clone)]
pub struct ParaInfo {
pub justification: TessParagraphJustification,
pub is_list_item: bool,
pub is_crown: bool,
pub first_line_indent: i32,
pub left: i32,
pub top: i32,
pub right: i32,
pub bottom: i32,
}
pub struct PageIterator {
pub handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for PageIterator {}
unsafe impl Sync for PageIterator {}
impl PageIterator {
/// Creates a new instance of the PageIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the PageIterator.
///
/// # Returns
///
/// Returns the new instance of the PageIterator.
pub fn new(handle: *mut c_void) -> Self {
PageIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Begins the iteration.
pub fn begin(&self) -> Result<()> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
unsafe { TessPageIteratorBegin(*handle) };
Ok(())
}
/// Gets the next iterator.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
///
/// # Returns
///
/// Returns `Result<bool>` - `Ok(true)` if the next iterator is successful, `Ok(false)` otherwise.
pub fn next(&self, level: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessPageIteratorNext(*handle, level as c_int) != 0 })
}
/// Checks if the current iterator is at the beginning of the specified level.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
///
/// # Returns
///
/// Returns `Result<bool>` - `Ok(true)` if at the beginning, `Ok(false)` otherwise.
pub fn is_at_beginning_of(&self, level: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessPageIteratorIsAtBeginningOf(*handle, level as c_int) != 0 })
}
/// Checks if the current iterator is at the final element of the specified level.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
/// * `element` - Element of the iterator.
///
/// # Returns
///
/// Returns `Result<bool>` - `Ok(true)` if at the final element, `Ok(false)` otherwise.
pub fn is_at_final_element(&self, level: TessPageIteratorLevel, element: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessPageIteratorIsAtFinalElement(*handle, level as c_int, element as c_int) != 0 })
}
/// Gets the bounding box of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the bounding box.
///
/// # Returns
///
/// Returns the bounding box as a tuple if successful, otherwise returns an error.
pub fn bounding_box(&self, level: TessPageIteratorLevel) -> Result<(i32, i32, i32, i32)> {
let mut left = 0;
let mut top = 0;
let mut right = 0;
let mut bottom = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe {
TessPageIteratorBoundingBox(*handle, level as c_int, &mut left, &mut top, &mut right, &mut bottom)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((left, top, right, bottom))
}
}
/// Gets the block type of the current iterator.
///
/// # Returns
///
/// Returns the block type as a `TessPolyBlockType`.
pub fn block_type(&self) -> Result<TessPolyBlockType> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let block_type = unsafe { TessPageIteratorBlockType(*handle) };
Ok(TessPolyBlockType::from_int(block_type))
}
/// Gets the baseline of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the baseline.
///
/// # Returns
///
/// Returns the baseline as a tuple if successful, otherwise returns an error.
pub fn baseline(&self, level: i32) -> Result<(i32, i32, i32, i32)> {
let mut x1 = 0;
let mut y1 = 0;
let mut x2 = 0;
let mut y2 = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe { TessPageIteratorBaseline(*handle, level, &mut x1, &mut y1, &mut x2, &mut y2) };
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((x1, y1, x2, y2))
}
}
/// Gets the orientation of the current iterator.
///
/// # Returns
///
/// Returns the orientation as a tuple if successful, otherwise returns an error.
pub fn orientation(&self) -> Result<(TessOrientation, TessWritingDirection, TessTextlineOrder, f32)> {
let mut orientation = 0;
let mut writing_direction = 0;
let mut textline_order = 0;
let mut deskew_angle = 0.0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe {
TessPageIteratorOrientation(
*handle,
&mut orientation,
&mut writing_direction,
&mut textline_order,
&mut deskew_angle,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
TessOrientation::from_int(orientation),
TessWritingDirection::from_int(writing_direction),
TessTextlineOrder::from_int(textline_order),
deskew_angle,
))
}
}
/// Extracts all blocks from the page in a single mutex-locked pass.
///
/// Resets the iterator to the beginning, then iterates at `RIL_BLOCK` level,
/// collecting block type and bounding box for each block found.
///
/// # Returns
///
/// Returns `Ok(Vec<BlockInfo>)` with one entry per block, or an error if the
/// mutex cannot be acquired.
pub fn extract_all_blocks(&self) -> Result<Vec<BlockInfo>> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let level = TessPageIteratorLevel::RIL_BLOCK as c_int;
let mut blocks = Vec::new();
// SAFETY: `*handle` is a valid non-null TessPageIterator pointer owned by this struct.
// `TessPageIteratorBegin` resets the iterator to the first element and takes only
// the pointer — no aliasing occurs because we hold the mutex for the duration.
unsafe { TessPageIteratorBegin(*handle) };
loop {
let block_type = unsafe {
// SAFETY: `*handle` is valid; TessPageIteratorBlockType reads the current
// iterator position and returns an integer enum value without taking ownership.
TessPageIteratorBlockType(*handle)
};
let mut left: c_int = 0;
let mut top: c_int = 0;
let mut right: c_int = 0;
let mut bottom: c_int = 0;
let bbox_ok = unsafe {
// SAFETY: `*handle` is valid; the four `*mut c_int` pointers point to local
// stack variables whose lifetimes exceed this call.
TessPageIteratorBoundingBox(*handle, level, &mut left, &mut top, &mut right, &mut bottom)
};
if bbox_ok != 0 {
blocks.push(BlockInfo {
block_type: TessPolyBlockType::from_int(block_type),
left,
top,
right,
bottom,
});
}
let has_next = unsafe {
// SAFETY: `*handle` is valid; TessPageIteratorNext advances the iterator
// in-place and returns 0 when there are no more elements at this level.
TessPageIteratorNext(*handle, level)
};
if has_next == 0 {
break;
}
}
Ok(blocks)
}
/// Extracts all paragraphs from the page in a single mutex-locked pass.
///
/// Resets the iterator to the beginning, then iterates at `RIL_PARA` level,
/// collecting paragraph metadata and bounding box for each paragraph found.
///
/// # Returns
///
/// Returns `Ok(Vec<ParaInfo>)` with one entry per paragraph, or an error if the
/// mutex cannot be acquired.
pub fn extract_all_paragraphs(&self) -> Result<Vec<ParaInfo>> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let level = TessPageIteratorLevel::RIL_PARA as c_int;
let mut paragraphs = Vec::new();
// SAFETY: `*handle` is a valid non-null TessPageIterator pointer owned by this struct.
// `TessPageIteratorBegin` resets the iterator to the first element; the mutex ensures
// exclusive access for the entire loop.
unsafe { TessPageIteratorBegin(*handle) };
loop {
let mut justification: c_int = 0;
// SAFETY: TessPageIteratorParagraphInfo expects BOOL* (int*) for is_list_item and
// is_crown. Rust bool is 1 byte while C int is 4 bytes, so we use c_int temporaries
// to avoid undefined behaviour (stack corruption) and convert afterwards.
let mut is_list_item_raw: c_int = 0;
let mut is_crown_raw: c_int = 0;
let mut first_line_indent: c_int = 0;
let para_ok = unsafe {
// SAFETY: `*handle` is valid; all output pointers reference stack variables
// whose lifetimes exceed this call. TessPageIteratorParagraphInfo writes
// through these pointers without retaining them.
TessPageIteratorParagraphInfo(
*handle,
&mut justification,
&mut is_list_item_raw,
&mut is_crown_raw,
&mut first_line_indent,
)
};
let is_list_item = is_list_item_raw != 0;
let is_crown = is_crown_raw != 0;
let mut left: c_int = 0;
let mut top: c_int = 0;
let mut right: c_int = 0;
let mut bottom: c_int = 0;
let bbox_ok = unsafe {
// SAFETY: `*handle` is valid; the four `*mut c_int` pointers reference local
// stack variables. TessPageIteratorBoundingBox does not retain these pointers.
TessPageIteratorBoundingBox(*handle, level, &mut left, &mut top, &mut right, &mut bottom)
};
if para_ok != 0 && bbox_ok != 0 {
paragraphs.push(ParaInfo {
justification: TessParagraphJustification::from_int(justification),
is_list_item,
is_crown,
first_line_indent,
left,
top,
right,
bottom,
});
}
let has_next = unsafe {
// SAFETY: `*handle` is valid; TessPageIteratorNext advances the iterator
// in-place and returns 0 when there are no more elements at this level.
TessPageIteratorNext(*handle, level)
};
if has_next == 0 {
break;
}
}
Ok(paragraphs)
}
/// Gets the paragraph information of the current iterator.
///
/// # Returns
///
/// Returns the paragraph information as a tuple if successful, otherwise returns an error.
pub fn paragraph_info(&self) -> Result<(TessParagraphJustification, bool, bool, i32)> {
let mut justification = 0;
// SAFETY: TessPageIteratorParagraphInfo expects BOOL* (int*) for is_list_item and
// is_crown. Rust bool is 1 byte while C int is 4 bytes, so we use c_int temporaries
// to avoid undefined behaviour (stack corruption) and convert afterwards.
let mut is_list_item_raw: c_int = 0;
let mut is_crown_raw: c_int = 0;
let mut first_line_indent = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe {
TessPageIteratorParagraphInfo(
*handle,
&mut justification,
&mut is_list_item_raw,
&mut is_crown_raw,
&mut first_line_indent,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
TessParagraphJustification::from_int(justification),
is_list_item_raw != 0,
is_crown_raw != 0,
first_line_indent,
))
}
}
}
impl Drop for PageIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessPageIteratorDelete(*handle) };
}
}
}
ffi_extern! {
pub fn TessPageIteratorDelete(handle: *mut c_void);
pub fn TessPageIteratorBegin(handle: *mut c_void);
pub fn TessPageIteratorNext(handle: *mut c_void, level: c_int) -> c_int;
pub fn TessPageIteratorIsAtBeginningOf(handle: *mut c_void, level: c_int) -> c_int;
pub fn TessPageIteratorIsAtFinalElement(handle: *mut c_void, level: c_int, element: c_int) -> c_int;
pub fn TessPageIteratorBoundingBox(
handle: *mut c_void,
level: c_int,
left: *mut c_int,
top: *mut c_int,
right: *mut c_int,
bottom: *mut c_int,
) -> c_int;
pub fn TessPageIteratorBlockType(handle: *mut c_void) -> c_int;
pub fn TessPageIteratorBaseline(
handle: *mut c_void,
level: c_int,
x1: *mut c_int,
y1: *mut c_int,
x2: *mut c_int,
y2: *mut c_int,
) -> c_int;
pub fn TessPageIteratorOrientation(
handle: *mut c_void,
orientation: *mut c_int,
writing_direction: *mut c_int,
textline_order: *mut c_int,
deskew_angle: *mut c_float,
) -> c_int;
pub fn TessBaseAPIGetIterator(handle: *mut c_void) -> *mut c_void;
pub fn TessPageIteratorParagraphInfo(
handle: *mut c_void,
justification: *mut c_int,
is_list_item: *mut c_int,
is_crown: *mut c_int,
first_line_indent: *mut c_int,
) -> c_int;
}

View File

@@ -0,0 +1,589 @@
use crate::api::TessDeleteText;
use crate::enums::TessPageIteratorLevel;
use crate::error::{Result, TesseractError};
use std::ffi::CStr;
use std::os::raw::{c_char, c_float, c_int, c_void};
use std::sync::{Arc, Mutex};
/// Font attributes detected by Tesseract for a word.
#[derive(Debug, Clone)]
pub struct FontAttributes {
pub is_bold: bool,
pub is_italic: bool,
pub is_underlined: bool,
pub is_monospace: bool,
pub is_serif: bool,
pub is_smallcaps: bool,
pub pointsize: i32,
pub font_id: i32,
}
/// Complete word data extracted in a single mutex lock.
#[derive(Debug, Clone)]
pub struct WordData {
pub text: String,
pub left: i32,
pub top: i32,
pub right: i32,
pub bottom: i32,
pub confidence: f32,
pub font_attrs: Option<FontAttributes>,
}
pub struct ResultIterator {
pub handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for ResultIterator {}
unsafe impl Sync for ResultIterator {}
impl ResultIterator {
/// Creates a new instance of the ResultIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the ResultIterator.
///
/// # Returns
///
/// Returns the new instance of the ResultIterator.
pub fn new(handle: *mut c_void) -> Self {
ResultIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Gets the UTF-8 text of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the text.
///
/// # Returns
///
/// Returns the UTF-8 text as a `String` if successful, otherwise returns an error.
pub fn get_utf8_text(&self, level: TessPageIteratorLevel) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorGetUTF8Text() allocates and returns a pointer to a C string.
// This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator (mutex-guarded)
// 2. level is a valid TessPageIteratorLevel enum converted to c_int (in valid range)
// 3. The returned pointer is either null (error) or a valid null-terminated C string
// allocated on Tesseract's heap (must be freed with TessDeleteText)
let text_ptr = unsafe { TessResultIteratorGetUTF8Text(*handle, level as c_int) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: We've verified text_ptr is non-null. The allocation/deallocation pattern is:
// 1. text_ptr was allocated by TessResultIteratorGetUTF8Text() on the FFI boundary
// 2. CStr::from_ptr(text_ptr) is safe: pointer is non-null and points to valid C string
// 3. We read from the string (to_str() creates temporary immutable borrow)
// 4. We immediately copy all data to owned String before deallocation
// 5. The string data remains valid until TessDeleteText is called
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let result = c_str.to_str()?.to_owned();
// SAFETY: TessDeleteText() deallocates memory allocated by TessResultIteratorGetUTF8Text():
// 1. text_ptr must be non-null (verified above)
// 2. text_ptr came from the Tesseract API (trusted source, correct allocation)
// 3. TessDeleteText() is the correct deallocation function for this allocation
// 4. Must be called exactly once per allocation to avoid double-free (we ensure this)
// 5. After this call, text_ptr is invalid; all uses must be via owned result String
unsafe { TessDeleteText(text_ptr as *mut c_char) };
Ok(result)
}
/// Gets the confidence of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the confidence.
///
/// # Returns
///
/// Returns the confidence as a `f32`.
pub fn confidence(&self, level: TessPageIteratorLevel) -> Result<f32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorConfidence() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. level is a valid TessPageIteratorLevel enum converted to c_int
// 3. The function only reads state and returns an f32 value (copyable)
// 4. No pointer operations or memory access is needed
Ok(unsafe { TessResultIteratorConfidence(*handle, level as c_int) })
}
/// Gets the recognition language of the current iterator.
///
/// # Returns
///
/// Returns the recognition language as a `String` if successful, otherwise returns an error.
pub fn word_recognition_language(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorWordRecognitionLanguage() returns a pointer to a C string
// in the iterator's memory. This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The returned pointer is either null or a valid null-terminated C string
let lang_ptr = unsafe { TessResultIteratorWordRecognitionLanguage(*handle) };
if lang_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: We've verified lang_ptr is non-null. CStr::from_ptr() is safe because:
// 1. lang_ptr points to a valid null-terminated C string managed by Tesseract
// 2. We only read from it (to_str() creates temporary borrow)
let c_str = unsafe { CStr::from_ptr(lang_ptr) };
Ok(c_str.to_str()?.to_owned())
}
/// Gets the font attributes of the current iterator.
///
/// # Returns
///
/// Returns the font attributes as a tuple if successful, otherwise returns an error.
pub fn word_font_attributes(&self) -> Result<(bool, bool, bool, bool, bool, bool, i32, i32)> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let mut is_bold = 0;
let mut is_italic = 0;
let mut is_underlined = 0;
let mut is_monospace = 0;
let mut is_serif = 0;
let mut is_smallcaps = 0;
let mut pointsize = 0;
let mut font_id = 0;
// SAFETY: TessResultIteratorWordFontAttributes() takes output parameter pointers
// and fills them with font attribute values. This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator (mutex-guarded)
// 2. All mutable references (&mut ...) are valid local stack variables
// 3. Each reference has a distinct memory location (no aliasing)
// 4. The references outlive the FFI call (defined on stack, used immediately after)
// 5. The function writes output i32 values (0/1 for bools, integers for size/id)
// 6. Each reference has exclusive mutable access (Rust borrow checker enforces this)
// 7. The output parameters are independent (function cannot cause data races)
let result = unsafe {
TessResultIteratorWordFontAttributes(
*handle,
&mut is_bold,
&mut is_italic,
&mut is_underlined,
&mut is_monospace,
&mut is_serif,
&mut is_smallcaps,
&mut pointsize,
&mut font_id,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
is_bold != 0,
is_italic != 0,
is_underlined != 0,
is_monospace != 0,
is_serif != 0,
is_smallcaps != 0,
pointsize,
font_id,
))
}
}
/// Checks if the current iterator is from the dictionary.
///
/// # Returns
///
/// Returns `true` if the current iterator is from the dictionary, otherwise returns `false`.
pub fn word_is_from_dictionary(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorWordIsFromDictionary() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value (0 or non-zero)
// 3. No pointer operations or memory modifications are needed
Ok(unsafe { TessResultIteratorWordIsFromDictionary(*handle) != 0 })
}
/// Checks if the current iterator is numeric.
///
/// # Returns
///
/// Returns `true` if the current iterator is numeric, otherwise returns `false`.
pub fn word_is_numeric(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorWordIsNumeric() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorWordIsNumeric(*handle) != 0 })
}
/// Checks if the current iterator is superscript.
///
/// # Returns
///
/// Returns `true` if the current iterator is superscript, otherwise returns `false`.
pub fn symbol_is_superscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorSymbolIsSuperscript() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorSymbolIsSuperscript(*handle) != 0 })
}
/// Checks if the current iterator is subscript.
///
/// # Returns
///
/// Returns `true` if the current iterator is subscript, otherwise returns `false`.
pub fn symbol_is_subscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorSymbolIsSubscript() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorSymbolIsSubscript(*handle) != 0 })
}
/// Checks if the current iterator is dropcap.
///
/// # Returns
///
/// Returns `true` if the current iterator is dropcap, otherwise returns `false`.
pub fn symbol_is_dropcap(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorSymbolIsDropcap() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorSymbolIsDropcap(*handle) != 0 })
}
/// Moves to the next iterator.
///
/// # Arguments
///
/// * `level` - Level of the next iterator.
///
/// # Returns
///
/// Returns `true` if the next iterator exists, otherwise returns `false`.
pub fn next(&self, level: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorNext() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. level is a valid TessPageIteratorLevel enum converted to c_int
// 3. The function modifies iterator state (advances position) and returns i32 result
// 4. The mutex ensures exclusive access during state modification
Ok(unsafe { TessResultIteratorNext(*handle, level as c_int) != 0 })
}
/// Gets the current word from the iterator with its bounding box and confidence.
///
/// # Returns
///
/// Returns a tuple of (text, left, top, right, bottom, confidence) if successful
pub fn get_word_with_bounds(&self) -> Result<(String, i32, i32, i32, i32, f32)> {
let text = self.get_utf8_text(TessPageIteratorLevel::RIL_WORD)?;
let (left, top, right, bottom) = self.get_bounding_box(TessPageIteratorLevel::RIL_WORD)?;
let confidence = self.confidence(TessPageIteratorLevel::RIL_WORD)?;
Ok((text, left, top, right, bottom, confidence))
}
/// Advances the iterator to the next word.
///
/// # Returns
///
/// Returns true if successful, false if there are no more words
pub fn next_word(&self) -> Result<bool> {
self.next(TessPageIteratorLevel::RIL_WORD)
}
/// Gets the word information for the current position in the iterator.
/// Should be called before next() to ensure valid data.
///
/// # Returns
/// Returns a tuple of (text, left, top, right, bottom, confidence) if successful
pub fn get_current_word(&self) -> Result<(String, i32, i32, i32, i32, f32)> {
let text = self.get_utf8_text(TessPageIteratorLevel::RIL_WORD)?;
let (left, top, right, bottom) = self.get_bounding_box(TessPageIteratorLevel::RIL_WORD)?;
let confidence = self.confidence(TessPageIteratorLevel::RIL_WORD)?;
Ok((text, left, top, right, bottom, confidence))
}
/// Gets the bounding box for the current element.
pub fn get_bounding_box(&self, level: TessPageIteratorLevel) -> Result<(i32, i32, i32, i32)> {
let mut left = 0;
let mut top = 0;
let mut right = 0;
let mut bottom = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessPageIteratorBoundingBox() queries iterator state and returns coordinates
// via output parameters. This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator or PageIterator (mutex-guarded)
// 2. level is a valid TessPageIteratorLevel enum converted to c_int (in valid range)
// 3. All mutable references (&mut left, &mut top, &mut right, &mut bottom)
// are valid local stack variables with distinct memory locations
// 4. Each reference is exclusively borrowed (Rust enforces no aliasing)
// 5. The references outlive the FFI call (defined on stack, used immediately after)
// 6. The function writes four i32 coordinate values into these references
// 7. No pointer escaping: the function only writes to these parameters, doesn't store them
// 8. Return value indicates success/failure (checked below)
let result = unsafe {
TessPageIteratorBoundingBox(*handle, level as c_int, &mut left, &mut top, &mut right, &mut bottom)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((left, top, right, bottom))
}
}
/// Extracts all word data from the iterator in a single mutex lock.
///
/// Acquires the mutex once and iterates all words, collecting text, bounding box,
/// confidence, and font attributes for each word. This is more efficient than
/// calling individual methods in a loop since it avoids repeated mutex acquisitions.
///
/// The iterator is always reset to the beginning before traversal so that partial
/// prior consumption does not cause words to be missed.
///
/// # Returns
///
/// Returns a `Vec<WordData>` containing data for every word, or an error if the
/// mutex cannot be acquired.
pub fn extract_all_words(&self) -> Result<Vec<WordData>> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let raw = *handle;
let mut words = Vec::new();
// Reset to the first element before traversal. ResultIterator inherits from
// PageIterator in C++, so TessPageIteratorBegin operates on the same handle.
// SAFETY: raw is a valid mutex-guarded ResultIterator pointer; TessPageIteratorBegin
// simply resets the internal position and does not allocate or free memory.
unsafe { TessPageIteratorBegin(raw) };
loop {
// SAFETY: raw is the mutex-guarded *mut c_void handle. All calls within this
// loop are performed while holding the mutex lock, ensuring exclusive access.
// We pass raw directly to the unlocked helper to avoid re-locking.
match extract_word_data_unlocked(raw) {
Ok(word) => words.push(word),
// NullPointerError means the text pointer was null; skip this position.
// InvalidParameterError means bounding box failed; skip this position.
// Utf8Error means the text was not valid UTF-8; skip this word rather than
// aborting, so the remaining words in the iterator are not lost.
Err(TesseractError::NullPointerError)
| Err(TesseractError::InvalidParameterError)
| Err(TesseractError::Utf8Error(_)) => {}
Err(e) => return Err(e),
}
// SAFETY: TessResultIteratorNext() advances the iterator state and returns
// non-zero if a next element exists. This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (mutex-guarded)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value
// 3. The mutex is held for the duration of this call (exclusive access)
// 4. The function modifies iterator position and returns an i32 result
let has_next = unsafe { TessResultIteratorNext(raw, TessPageIteratorLevel::RIL_WORD as c_int) != 0 };
if !has_next {
break;
}
}
Ok(words)
}
/// Extracts the current word's data in a single mutex lock.
///
/// Acquires the mutex once and calls all FFI functions (text, bounding box,
/// confidence, font attributes) within that lock scope. More efficient than
/// calling the individual methods separately when all fields are needed.
///
/// # Returns
///
/// Returns a [`WordData`] struct if successful, otherwise returns an error.
pub fn extract_word_data(&self) -> Result<WordData> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
extract_word_data_unlocked(*handle)
}
}
/// Extracts word data from a raw iterator handle without acquiring the mutex.
///
/// The caller MUST hold the mutex lock for the `ResultIterator` this handle belongs to
/// before calling this function. Passing a handle that is not mutex-guarded, or calling
/// this function concurrently on the same handle, is undefined behaviour.
fn extract_word_data_unlocked(raw: *mut c_void) -> Result<WordData> {
// SAFETY: TessResultIteratorGetUTF8Text() allocates and returns a pointer to a C string.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value converted to c_int
// 3. The returned pointer is either null (error) or a valid null-terminated C string
// allocated on Tesseract's heap (must be freed with TessDeleteText)
let text_ptr = unsafe { TessResultIteratorGetUTF8Text(raw, TessPageIteratorLevel::RIL_WORD as c_int) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: We've verified text_ptr is non-null. The allocation/deallocation pattern is:
// 1. text_ptr was allocated by TessResultIteratorGetUTF8Text() on the FFI boundary
// 2. CStr::from_ptr(text_ptr) is safe: pointer is non-null and points to valid C string
// 3. We immediately copy all data to an owned String before deallocation
// 4. The string data remains valid until TessDeleteText is called
let text = {
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let owned = c_str.to_str()?.to_owned();
// SAFETY: TessDeleteText() deallocates memory allocated by TessResultIteratorGetUTF8Text():
// 1. text_ptr is non-null (verified above)
// 2. text_ptr came from the Tesseract API (correct allocation type)
// 3. TessDeleteText() is the correct deallocation function for this allocation
// 4. Called exactly once per allocation to avoid double-free
// 5. owned String was already populated; text_ptr is no longer accessed after this call
unsafe { TessDeleteText(text_ptr as *mut c_char) };
owned
};
let mut left = 0;
let mut top = 0;
let mut right = 0;
let mut bottom = 0;
// SAFETY: TessPageIteratorBoundingBox() queries iterator state and fills output parameters.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value converted to c_int
// 3. All mutable references are valid local stack variables with distinct memory locations
// 4. Each reference is exclusively borrowed (Rust enforces no aliasing)
// 5. The references outlive the FFI call (defined on stack, used immediately after)
// 6. Return value indicates success/failure (checked below)
let bbox_result = unsafe {
TessPageIteratorBoundingBox(
raw,
TessPageIteratorLevel::RIL_WORD as c_int,
&mut left,
&mut top,
&mut right,
&mut bottom,
)
};
if bbox_result == 0 {
return Err(TesseractError::InvalidParameterError);
}
// SAFETY: TessResultIteratorConfidence() reads iterator state and returns an f32 value.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value converted to c_int
// 3. The function only reads state and returns a copy (no pointer operations)
let confidence = unsafe { TessResultIteratorConfidence(raw, TessPageIteratorLevel::RIL_WORD as c_int) };
// Collect font attributes; treat any failure as absent rather than propagating the error.
let font_attrs = {
let mut is_bold = 0;
let mut is_italic = 0;
let mut is_underlined = 0;
let mut is_monospace = 0;
let mut is_serif = 0;
let mut is_smallcaps = 0;
let mut pointsize = 0;
let mut font_id = 0;
// SAFETY: TessResultIteratorWordFontAttributes() fills output parameters with font info.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. All mutable references are valid local stack variables with distinct memory locations
// 3. Each reference is exclusively borrowed (no aliasing)
// 4. The references outlive the FFI call
// 5. Return value is non-zero on success, zero on failure (checked below)
let result = unsafe {
TessResultIteratorWordFontAttributes(
raw,
&mut is_bold,
&mut is_italic,
&mut is_underlined,
&mut is_monospace,
&mut is_serif,
&mut is_smallcaps,
&mut pointsize,
&mut font_id,
)
};
if result != 0 {
Some(FontAttributes {
is_bold: is_bold != 0,
is_italic: is_italic != 0,
is_underlined: is_underlined != 0,
is_monospace: is_monospace != 0,
is_serif: is_serif != 0,
is_smallcaps: is_smallcaps != 0,
pointsize,
font_id,
})
} else {
None
}
};
Ok(WordData {
text,
left,
top,
right,
bottom,
confidence,
font_attrs,
})
}
impl Drop for ResultIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
// SAFETY: TessResultIteratorDelete() frees the ResultIterator handle allocated by Tesseract:
// 1. We use .ok() pattern to handle poisoned mutex gracefully (no panic in Drop)
// 2. *handle is a valid opaque pointer allocated by TessBaseAPIGetIterator()
// or TessBaseAPIGetMutableIterator() - Tesseract owns this memory
// 3. TessResultIteratorDelete() is the single correct way to deallocate this type
// 4. The function must be called exactly once per allocation to avoid double-free
// 5. After calling delete, the pointer is invalid; future use would cause use-after-free
// 6. Drop impl never panics (we use .ok() guard), ensuring cleanup always executes
// 7. If mutex is poisoned, handle cleanup is skipped (OS will reclaim process memory)
unsafe { TessResultIteratorDelete(*handle) };
}
}
}
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
ffi_extern! {
pub fn TessResultIteratorDelete(handle: *mut c_void);
pub fn TessPageIteratorBegin(handle: *mut c_void);
pub fn TessResultIteratorGetUTF8Text(handle: *mut c_void, level: c_int) -> *mut c_char;
pub fn TessResultIteratorConfidence(handle: *mut c_void, level: c_int) -> c_float;
pub fn TessResultIteratorWordRecognitionLanguage(handle: *mut c_void) -> *const c_char;
pub fn TessResultIteratorWordFontAttributes(
handle: *mut c_void,
is_bold: *mut c_int,
is_italic: *mut c_int,
is_underlined: *mut c_int,
is_monospace: *mut c_int,
is_serif: *mut c_int,
is_smallcaps: *mut c_int,
pointsize: *mut c_int,
font_id: *mut c_int,
) -> c_int;
pub fn TessResultIteratorWordIsFromDictionary(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorWordIsNumeric(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorSymbolIsSuperscript(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorSymbolIsSubscript(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorSymbolIsDropcap(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorNext(handle: *mut c_void, level: c_int) -> c_int;
pub fn TessPageIteratorBoundingBox(
handle: *mut c_void,
level: c_int,
left: *mut c_int,
top: *mut c_int,
right: *mut c_int,
bottom: *mut c_int,
) -> c_int;
}

View File

@@ -0,0 +1,212 @@
use crate::TesseractAPI;
use crate::error::{Result, TesseractError};
use std::ffi::{CStr, CString};
use std::os::raw::{c_char, c_int, c_void};
use std::sync::Arc;
use std::sync::Mutex;
pub struct TessResultRenderer {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for TessResultRenderer {}
unsafe impl Sync for TessResultRenderer {}
impl TessResultRenderer {
/// Creates a new instance of the TessResultRenderer.
///
/// # Arguments
///
/// * `outputbase` - Output base path.
///
/// # Returns
///
/// Returns the new instance of the TessResultRenderer.
pub fn new_text_renderer(outputbase: &str) -> Result<Self> {
let outputbase = CString::new(outputbase).map_err(|_| TesseractError::NullByteInString)?;
let handle = unsafe { TessTextRendererCreate(outputbase.as_ptr()) };
if handle.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(TessResultRenderer {
handle: Arc::new(Mutex::new(handle)),
})
}
}
/// Creates a new instance of the TessResultRenderer for HOCR.
///
/// # Arguments
///
/// * `outputbase` - Output base path.
///
/// # Returns
///
/// Returns the new instance of the TessResultRenderer.
pub fn new_hocr_renderer(outputbase: &str) -> Result<Self> {
let outputbase = CString::new(outputbase).map_err(|_| TesseractError::NullByteInString)?;
let handle = unsafe { TessHOcrRendererCreate(outputbase.as_ptr()) };
if handle.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(TessResultRenderer {
handle: Arc::new(Mutex::new(handle)),
})
}
}
/// Creates a new instance of the TessResultRenderer for PDF.
///
/// # Arguments
///
/// * `outputbase` - Output base path.
/// * `datadir` - Data directory path.
/// * `textonly` - Whether to include text only.
///
/// # Returns
///
/// Returns the new instance of the TessResultRenderer.
pub fn new_pdf_renderer(outputbase: &str, datadir: &str, textonly: bool) -> Result<Self> {
let outputbase = CString::new(outputbase).map_err(|_| TesseractError::NullByteInString)?;
let datadir = CString::new(datadir).map_err(|_| TesseractError::NullByteInString)?;
let handle = unsafe { TessPDFRendererCreate(outputbase.as_ptr(), datadir.as_ptr(), textonly as c_int) };
if handle.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(TessResultRenderer {
handle: Arc::new(Mutex::new(handle)),
})
}
}
/// Begins a new document.
///
/// # Arguments
///
/// * `title` - Title of the document.
///
/// # Returns
///
/// Returns `true` if the document was created successfully, otherwise returns `false`.
///
/// # Errors
///
/// Returns a `TesseractError` if the string contains a null byte or if the mutex lock fails.
pub fn begin_document(&self, title: &str) -> Result<bool> {
let title = CString::new(title).map_err(|_| TesseractError::NullByteInString)?;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererBeginDocument(*handle, title.as_ptr()) != 0 })
}
/// Adds an image to the document.
///
/// # Arguments
///
/// * `api` - The TesseractAPI instance.
///
/// # Returns
///
/// Returns `true` if the image was added successfully, otherwise returns `false`.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if either mutex lock fails.
pub fn add_image(&self, api: &TesseractAPI) -> Result<bool> {
let api_handle = api.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererAddImage(*handle, *api_handle) != 0 })
}
/// Ends the document.
///
/// # Returns
///
/// Returns `true` if the document was ended successfully, otherwise returns `false`.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn end_document(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererEndDocument(*handle) != 0 })
}
/// Gets the extension of the document.
///
/// # Returns
///
/// Returns the extension as a `String` if successful, otherwise returns an error.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails,
/// `TesseractError::NullPointerError` if the extension pointer is null,
/// or `TesseractError::Utf8Error` if the extension contains invalid UTF-8.
pub fn get_extension(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let ext_ptr = unsafe { TessResultRendererExtention(*handle) };
if ext_ptr.is_null() {
Err(TesseractError::NullPointerError)
} else {
let c_str = unsafe { CStr::from_ptr(ext_ptr) };
Ok(c_str.to_str()?.to_owned())
}
}
/// Gets the title of the document.
///
/// # Returns
///
/// Returns the title as a `String` if successful, otherwise returns an error.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails,
/// `TesseractError::NullPointerError` if the title pointer is null,
/// or `TesseractError::Utf8Error` if the title contains invalid UTF-8.
pub fn get_title(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let title_ptr = unsafe { TessResultRendererTitle(*handle) };
if title_ptr.is_null() {
Err(TesseractError::NullPointerError)
} else {
let c_str = unsafe { CStr::from_ptr(title_ptr) };
Ok(c_str.to_str()?.to_owned())
}
}
/// Gets the number of images in the document.
///
/// # Returns
///
/// Returns the number of images as an `i32`.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn get_image_num(&self) -> Result<i32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererImageNum(*handle) })
}
}
impl Drop for TessResultRenderer {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessDeleteResultRenderer(*handle) };
}
}
}
ffi_extern! {
pub fn TessTextRendererCreate(outputbase: *const c_char) -> *mut c_void;
pub fn TessHOcrRendererCreate(outputbase: *const c_char) -> *mut c_void;
pub fn TessPDFRendererCreate(outputbase: *const c_char, datadir: *const c_char, textonly: c_int) -> *mut c_void;
pub fn TessDeleteResultRenderer(renderer: *mut c_void);
pub fn TessResultRendererBeginDocument(renderer: *mut c_void, title: *const c_char) -> c_int;
pub fn TessResultRendererAddImage(renderer: *mut c_void, api: *mut c_void) -> c_int;
pub fn TessResultRendererEndDocument(renderer: *mut c_void) -> c_int;
pub fn TessResultRendererExtention(renderer: *mut c_void) -> *const c_char;
pub fn TessResultRendererTitle(renderer: *mut c_void) -> *const c_char;
pub fn TessResultRendererImageNum(renderer: *mut c_void) -> c_int;
}

View File

@@ -0,0 +1,211 @@
use kreuzberg_tesseract::TesseractAPI;
use std::path::{Path, PathBuf};
fn get_default_tessdata_dir() -> PathBuf {
if cfg!(target_os = "macos") {
let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
PathBuf::from(home_dir)
.join("Library")
.join("Application Support")
.join("kreuzberg-tesseract")
.join("tessdata")
} else if cfg!(target_os = "linux") {
let system_paths = [
PathBuf::from("/usr/share/tesseract-ocr/5/tessdata"),
PathBuf::from("/usr/share/tesseract-ocr/tessdata"),
];
for path in &system_paths {
if path.exists() {
return path.clone();
}
}
let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
PathBuf::from(home_dir).join(".kreuzberg-tesseract").join("tessdata")
} else if cfg!(target_os = "windows") {
PathBuf::from(std::env::var("APPDATA").expect("APPDATA environment variable not set"))
.join("kreuzberg-tesseract")
.join("tessdata")
} else {
panic!("Unsupported operating system");
}
}
fn get_tessdata_dir() -> PathBuf {
match std::env::var("TESSDATA_PREFIX") {
Ok(dir) => {
let prefix_path = PathBuf::from(dir);
let tessdata_path = if prefix_path.ends_with("tessdata") {
prefix_path
} else {
prefix_path.join("tessdata")
};
println!("Using TESSDATA_PREFIX directory: {:?}", tessdata_path);
tessdata_path
}
Err(_) => {
let default_dir = get_default_tessdata_dir();
println!("TESSDATA_PREFIX not set, using default directory: {:?}", default_dir);
default_dir
}
}
}
fn ensure_eng_traineddata_exists(tessdata_dir: &Path) {
let eng_traineddata = tessdata_dir.join("eng.traineddata");
assert!(
eng_traineddata.exists(),
"eng.traineddata not found in {}. Set TESSDATA_PREFIX or install English tessdata.",
tessdata_dir.display()
);
}
fn repo_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("..").join("..")
}
fn load_test_image(relative: &str) -> Result<(Vec<u8>, u32, u32), Box<dyn std::error::Error>> {
let mut path = repo_root();
path.push("test_documents");
path.push(relative);
let img = image::open(&path)
.map_err(|e| format!("Failed to open test image {}: {}", path.display(), e))?
.to_rgb8();
let (width, height) = img.dimensions();
Ok((img.into_raw(), width, height))
}
#[test]
fn test_ocr_on_hello_world_image() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
api.init(tessdata_dir.to_str().unwrap(), "eng")
.expect("Failed to initialize Tesseract");
let (image_data, width, height) =
load_test_image("images/test_hello_world.png").expect("Failed to load test image");
api.set_image(&image_data, width as i32, height as i32, 3, 3 * width as i32)
.expect("Failed to set image");
let text = api.get_utf8_text().expect("Failed to perform OCR");
assert!(
text.to_lowercase().contains("hello"),
"Text does not contain expected word. Found: {}",
text
);
}
#[test]
fn test_ocr_on_table_image() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
api.init(tessdata_dir.to_str().unwrap(), "eng")
.expect("Failed to initialize Tesseract");
api.set_variable("tessedit_pageseg_mode", "1")
.expect("Failed to set PSM");
let (image_data, width, height) = load_test_image("images/simple_table.png").expect("Failed to load test image");
api.set_image(&image_data, width as i32, height as i32, 3, 3 * width as i32)
.expect("Failed to set image");
let text = api.get_utf8_text().expect("Failed to perform OCR");
let lowercase = text.to_lowercase();
assert!(
lowercase.contains("product") && lowercase.contains("price"),
"Table text missing expected words. Found: {}",
text
);
}
#[test]
fn test_invalid_language_code() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
let result = api.init(tessdata_dir.to_str().unwrap(), "invalid_lang");
assert!(result.is_err());
}
#[test]
fn test_empty_image_data() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
api.init(tessdata_dir.to_str().unwrap(), "eng")
.expect("Failed to initialize Tesseract");
let empty_data: Vec<u8> = Vec::new();
let res = api.set_image(&empty_data, 100, 100, 3, 300);
assert!(res.is_err());
}
#[test]
fn test_invalid_image_parameters() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
api.init(tessdata_dir.to_str().unwrap(), "eng")
.expect("Failed to initialize Tesseract");
let (image_data, width, height) =
load_test_image("images/test_hello_world.png").expect("Failed to load test image");
let res = api.set_image(&image_data, -1, height as i32, 3, 3 * width as i32);
assert!(res.is_err());
let res = api.set_image(&image_data, width as i32, 0, 3, 3 * width as i32);
assert!(res.is_err());
let res = api.set_image(&image_data, width as i32, height as i32, 0, 3 * width as i32);
assert!(res.is_err());
let res = api.set_image(&image_data, width as i32, height as i32, 3, width as i32);
assert!(res.is_err());
}
#[test]
fn test_variable_setting() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
api.init(tessdata_dir.to_str().unwrap(), "eng")
.expect("Failed to initialize Tesseract");
let res = api.set_variable("invalid_variable_name", "1");
assert!(res.is_err());
let res = api.set_variable("tessedit_char_whitelist", "");
assert!(res.is_ok());
assert!(api.set_variable("tessedit_pageseg_mode", "1").is_ok());
assert!(api.set_variable("tessedit_ocr_engine_mode", "1").is_ok());
}
#[test]
fn test_multiple_operations() {
let tessdata_dir = get_tessdata_dir();
ensure_eng_traineddata_exists(&tessdata_dir);
let api = TesseractAPI::new().expect("Failed to create TesseractAPI");
api.init(tessdata_dir.to_str().unwrap(), "eng")
.expect("Failed to initialize Tesseract");
let (image_data, width, height) =
load_test_image("images/test_hello_world.png").expect("Failed to load test image");
for _ in 0..3 {
let res = api.set_image(&image_data, width as i32, height as i32, 3, 3 * width as i32);
assert!(res.is_ok());
let text = api.get_utf8_text().expect("Failed to perform OCR");
assert!(!text.is_empty());
}
}