Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,77 @@
use crate::api::TessDeleteText;
use crate::error::{Result, TesseractError};
use std::ffi::CStr;
use std::os::raw::{c_char, c_float, c_int, c_void};
use std::sync::{Arc, Mutex};
pub struct ChoiceIterator {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for ChoiceIterator {}
unsafe impl Sync for ChoiceIterator {}
impl ChoiceIterator {
/// Creates a new instance of the ChoiceIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the ChoiceIterator.
pub fn new(handle: *mut c_void) -> Self {
ChoiceIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Gets the next choice.
///
/// # Returns
///
/// Returns `true` if the next choice is successful, otherwise returns `false`.
pub fn next(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessChoiceIteratorNext(*handle) != 0 })
}
/// Gets the UTF-8 text for the current choice.
///
/// # Returns
///
/// Returns the UTF-8 text as a `String` if successful, otherwise returns an error.
pub fn get_utf8_text(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let text_ptr = unsafe { TessChoiceIteratorGetUTF8Text(*handle) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let result = c_str.to_str()?.to_owned();
unsafe { TessDeleteText(text_ptr) };
Ok(result)
}
/// Gets the confidence of the current choice.
///
/// # Returns
///
/// Returns the confidence as a `f32`.
pub fn confidence(&self) -> Result<f32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessChoiceIteratorConfidence(*handle) })
}
}
impl Drop for ChoiceIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessChoiceIteratorDelete(*handle) };
}
}
}
ffi_extern! {
fn TessChoiceIteratorDelete(handle: *mut c_void);
fn TessChoiceIteratorNext(handle: *mut c_void) -> c_int;
fn TessChoiceIteratorGetUTF8Text(handle: *mut c_void) -> *mut c_char;
fn TessChoiceIteratorConfidence(handle: *mut c_void) -> c_float;
}

View File

@@ -0,0 +1,373 @@
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessPageSegMode {
PSM_OSD_ONLY = 0,
PSM_AUTO_OSD = 1,
PSM_AUTO_ONLY = 2,
PSM_AUTO = 3,
PSM_SINGLE_COLUMN = 4,
PSM_SINGLE_BLOCK_VERT_TEXT = 5,
PSM_SINGLE_BLOCK = 6,
PSM_SINGLE_LINE = 7,
PSM_SINGLE_WORD = 8,
PSM_CIRCLE_WORD = 9,
PSM_SINGLE_CHAR = 10,
PSM_SPARSE_TEXT = 11,
PSM_SPARSE_TEXT_OSD = 12,
PSM_RAW_LINE = 13,
PSM_COUNT = 14,
}
impl TessPageSegMode {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessPageSegMode::PSM_OSD_ONLY,
1 => TessPageSegMode::PSM_AUTO_OSD,
2 => TessPageSegMode::PSM_AUTO_ONLY,
3 => TessPageSegMode::PSM_AUTO,
4 => TessPageSegMode::PSM_SINGLE_COLUMN,
5 => TessPageSegMode::PSM_SINGLE_BLOCK_VERT_TEXT,
6 => TessPageSegMode::PSM_SINGLE_BLOCK,
7 => TessPageSegMode::PSM_SINGLE_LINE,
8 => TessPageSegMode::PSM_SINGLE_WORD,
9 => TessPageSegMode::PSM_CIRCLE_WORD,
10 => TessPageSegMode::PSM_SINGLE_CHAR,
11 => TessPageSegMode::PSM_SPARSE_TEXT,
12 => TessPageSegMode::PSM_SPARSE_TEXT_OSD,
13 => TessPageSegMode::PSM_RAW_LINE,
14 => TessPageSegMode::PSM_COUNT,
_ => TessPageSegMode::PSM_AUTO,
}
}
/// Safely convert an integer to a TessPageSegMode, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessPageSegMode::PSM_OSD_ONLY),
1 => Some(TessPageSegMode::PSM_AUTO_OSD),
2 => Some(TessPageSegMode::PSM_AUTO_ONLY),
3 => Some(TessPageSegMode::PSM_AUTO),
4 => Some(TessPageSegMode::PSM_SINGLE_COLUMN),
5 => Some(TessPageSegMode::PSM_SINGLE_BLOCK_VERT_TEXT),
6 => Some(TessPageSegMode::PSM_SINGLE_BLOCK),
7 => Some(TessPageSegMode::PSM_SINGLE_LINE),
8 => Some(TessPageSegMode::PSM_SINGLE_WORD),
9 => Some(TessPageSegMode::PSM_CIRCLE_WORD),
10 => Some(TessPageSegMode::PSM_SINGLE_CHAR),
11 => Some(TessPageSegMode::PSM_SPARSE_TEXT),
12 => Some(TessPageSegMode::PSM_SPARSE_TEXT_OSD),
13 => Some(TessPageSegMode::PSM_RAW_LINE),
14 => Some(TessPageSegMode::PSM_COUNT),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessPageIteratorLevel {
RIL_BLOCK = 0,
RIL_PARA = 1,
RIL_TEXTLINE = 2,
RIL_WORD = 3,
RIL_SYMBOL = 4,
}
impl TessPageIteratorLevel {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessPageIteratorLevel::RIL_BLOCK,
1 => TessPageIteratorLevel::RIL_PARA,
2 => TessPageIteratorLevel::RIL_TEXTLINE,
3 => TessPageIteratorLevel::RIL_WORD,
4 => TessPageIteratorLevel::RIL_SYMBOL,
_ => TessPageIteratorLevel::RIL_BLOCK,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessPolyBlockType {
PT_UNKNOWN = 0,
PT_FLOWING_TEXT = 1,
PT_HEADING_TEXT = 2,
PT_PULLOUT_TEXT = 3,
PT_EQUATION = 4,
PT_INLINE_EQUATION = 5,
PT_TABLE = 6,
PT_VERTICAL_TEXT = 7,
PT_CAPTION_TEXT = 8,
PT_FLOWING_IMAGE = 9,
PT_HEADING_IMAGE = 10,
PT_PULLOUT_IMAGE = 11,
PT_HORZ_LINE = 12,
PT_VERT_LINE = 13,
PT_NOISE = 14,
PT_COUNT = 15,
}
impl TessPolyBlockType {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessPolyBlockType::PT_UNKNOWN,
1 => TessPolyBlockType::PT_FLOWING_TEXT,
2 => TessPolyBlockType::PT_HEADING_TEXT,
3 => TessPolyBlockType::PT_PULLOUT_TEXT,
4 => TessPolyBlockType::PT_EQUATION,
5 => TessPolyBlockType::PT_INLINE_EQUATION,
6 => TessPolyBlockType::PT_TABLE,
7 => TessPolyBlockType::PT_VERTICAL_TEXT,
8 => TessPolyBlockType::PT_CAPTION_TEXT,
9 => TessPolyBlockType::PT_FLOWING_IMAGE,
10 => TessPolyBlockType::PT_HEADING_IMAGE,
11 => TessPolyBlockType::PT_PULLOUT_IMAGE,
12 => TessPolyBlockType::PT_HORZ_LINE,
13 => TessPolyBlockType::PT_VERT_LINE,
14 => TessPolyBlockType::PT_NOISE,
15 => TessPolyBlockType::PT_COUNT,
_ => TessPolyBlockType::PT_UNKNOWN,
}
}
/// Safely convert an integer to a TessPolyBlockType, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessPolyBlockType::PT_UNKNOWN),
1 => Some(TessPolyBlockType::PT_FLOWING_TEXT),
2 => Some(TessPolyBlockType::PT_HEADING_TEXT),
3 => Some(TessPolyBlockType::PT_PULLOUT_TEXT),
4 => Some(TessPolyBlockType::PT_EQUATION),
5 => Some(TessPolyBlockType::PT_INLINE_EQUATION),
6 => Some(TessPolyBlockType::PT_TABLE),
7 => Some(TessPolyBlockType::PT_VERTICAL_TEXT),
8 => Some(TessPolyBlockType::PT_CAPTION_TEXT),
9 => Some(TessPolyBlockType::PT_FLOWING_IMAGE),
10 => Some(TessPolyBlockType::PT_HEADING_IMAGE),
11 => Some(TessPolyBlockType::PT_PULLOUT_IMAGE),
12 => Some(TessPolyBlockType::PT_HORZ_LINE),
13 => Some(TessPolyBlockType::PT_VERT_LINE),
14 => Some(TessPolyBlockType::PT_NOISE),
15 => Some(TessPolyBlockType::PT_COUNT),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessOrientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
}
impl TessOrientation {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessOrientation::ORIENTATION_PAGE_UP,
1 => TessOrientation::ORIENTATION_PAGE_RIGHT,
2 => TessOrientation::ORIENTATION_PAGE_DOWN,
3 => TessOrientation::ORIENTATION_PAGE_LEFT,
_ => TessOrientation::ORIENTATION_PAGE_UP,
}
}
/// Safely convert an integer to a TessOrientation, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessOrientation::ORIENTATION_PAGE_UP),
1 => Some(TessOrientation::ORIENTATION_PAGE_RIGHT),
2 => Some(TessOrientation::ORIENTATION_PAGE_DOWN),
3 => Some(TessOrientation::ORIENTATION_PAGE_LEFT),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN = 0,
JUSTIFICATION_LEFT = 1,
JUSTIFICATION_CENTER = 2,
JUSTIFICATION_RIGHT = 3,
}
impl TessParagraphJustification {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessParagraphJustification::JUSTIFICATION_UNKNOWN,
1 => TessParagraphJustification::JUSTIFICATION_LEFT,
2 => TessParagraphJustification::JUSTIFICATION_CENTER,
3 => TessParagraphJustification::JUSTIFICATION_RIGHT,
_ => TessParagraphJustification::JUSTIFICATION_UNKNOWN,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
}
impl TessWritingDirection {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT,
1 => TessWritingDirection::WRITING_DIRECTION_RIGHT_TO_LEFT,
2 => TessWritingDirection::WRITING_DIRECTION_TOP_TO_BOTTOM,
_ => TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT,
}
}
/// Safely convert an integer to a TessWritingDirection, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT),
1 => Some(TessWritingDirection::WRITING_DIRECTION_RIGHT_TO_LEFT),
2 => Some(TessWritingDirection::WRITING_DIRECTION_TOP_TO_BOTTOM),
_ => None,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq)]
#[allow(non_camel_case_types)]
pub enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
}
impl TessTextlineOrder {
pub fn from_int(value: i32) -> Self {
match value {
0 => TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT,
1 => TessTextlineOrder::TEXTLINE_ORDER_RIGHT_TO_LEFT,
2 => TessTextlineOrder::TEXTLINE_ORDER_TOP_TO_BOTTOM,
_ => TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT,
}
}
/// Safely convert an integer to a TessTextlineOrder, returning None for invalid values.
pub fn try_from_int(value: i32) -> Option<Self> {
match value {
0 => Some(TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT),
1 => Some(TessTextlineOrder::TEXTLINE_ORDER_RIGHT_TO_LEFT),
2 => Some(TessTextlineOrder::TEXTLINE_ORDER_TOP_TO_BOTTOM),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_page_seg_mode_from_int() {
assert_eq!(TessPageSegMode::from_int(0), TessPageSegMode::PSM_OSD_ONLY);
assert_eq!(TessPageSegMode::from_int(3), TessPageSegMode::PSM_AUTO);
assert_eq!(TessPageSegMode::from_int(10), TessPageSegMode::PSM_SINGLE_CHAR);
assert_eq!(TessPageSegMode::from_int(999), TessPageSegMode::PSM_AUTO);
}
#[test]
fn test_page_seg_mode_conversion() {
let mode = TessPageSegMode::PSM_SINGLE_LINE;
assert_eq!(mode as i32, 7);
}
#[test]
fn test_page_iterator_level_from_int() {
assert_eq!(TessPageIteratorLevel::from_int(0), TessPageIteratorLevel::RIL_BLOCK);
assert_eq!(TessPageIteratorLevel::from_int(3), TessPageIteratorLevel::RIL_WORD);
assert_eq!(TessPageIteratorLevel::from_int(-1), TessPageIteratorLevel::RIL_BLOCK);
}
#[test]
fn test_poly_block_type_from_int() {
assert_eq!(TessPolyBlockType::from_int(1), TessPolyBlockType::PT_FLOWING_TEXT);
assert_eq!(TessPolyBlockType::from_int(6), TessPolyBlockType::PT_TABLE);
assert_eq!(TessPolyBlockType::from_int(100), TessPolyBlockType::PT_UNKNOWN);
}
#[test]
fn test_orientation_from_int() {
assert_eq!(TessOrientation::from_int(0), TessOrientation::ORIENTATION_PAGE_UP);
assert_eq!(TessOrientation::from_int(2), TessOrientation::ORIENTATION_PAGE_DOWN);
assert_eq!(TessOrientation::from_int(5), TessOrientation::ORIENTATION_PAGE_UP);
}
#[test]
fn test_paragraph_justification_from_int() {
assert_eq!(
TessParagraphJustification::from_int(1),
TessParagraphJustification::JUSTIFICATION_LEFT
);
assert_eq!(
TessParagraphJustification::from_int(3),
TessParagraphJustification::JUSTIFICATION_RIGHT
);
assert_eq!(
TessParagraphJustification::from_int(-1),
TessParagraphJustification::JUSTIFICATION_UNKNOWN
);
}
#[test]
fn test_writing_direction_from_int() {
assert_eq!(
TessWritingDirection::from_int(0),
TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT
);
assert_eq!(
TessWritingDirection::from_int(1),
TessWritingDirection::WRITING_DIRECTION_RIGHT_TO_LEFT
);
assert_eq!(
TessWritingDirection::from_int(10),
TessWritingDirection::WRITING_DIRECTION_LEFT_TO_RIGHT
);
}
#[test]
fn test_textline_order_from_int() {
assert_eq!(
TessTextlineOrder::from_int(0),
TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT
);
assert_eq!(
TessTextlineOrder::from_int(2),
TessTextlineOrder::TEXTLINE_ORDER_TOP_TO_BOTTOM
);
assert_eq!(
TessTextlineOrder::from_int(99),
TessTextlineOrder::TEXTLINE_ORDER_LEFT_TO_RIGHT
);
}
#[test]
fn test_enums_are_copy() {
fn assert_copy<T: Copy>() {}
assert_copy::<TessPageSegMode>();
assert_copy::<TessPageIteratorLevel>();
assert_copy::<TessPolyBlockType>();
assert_copy::<TessOrientation>();
assert_copy::<TessParagraphJustification>();
assert_copy::<TessWritingDirection>();
assert_copy::<TessTextlineOrder>();
}
}

View File

@@ -0,0 +1,85 @@
use std::str::Utf8Error;
use thiserror::Error;
/// Errors that can occur when using the Tesseract API.
#[derive(Error, Debug)]
pub enum TesseractError {
#[error("Failed to initialize Tesseract")]
InitError,
#[error("Failed to set image")]
SetImageError,
#[error("OCR operation failed")]
OcrError,
#[error("Invalid UTF-8 in Tesseract output")]
Utf8Error(#[from] Utf8Error),
#[error("Failed to lock mutex")]
MutexLockError,
#[error("Failed to set variable")]
SetVariableError,
#[error("Failed to get variable")]
GetVariableError,
#[error("Null pointer error")]
NullPointerError,
#[error("Invalid parameter")]
InvalidParameterError,
#[error("Layout analysis failed")]
AnalyseLayoutError,
#[error("Page processing failed")]
ProcessPagesError,
#[error("I/O error")]
IoError,
#[error("Mutex error")]
MutexError,
#[error("Invalid dimensions")]
InvalidDimensions,
#[error("Invalid bytes per pixel")]
InvalidBytesPerPixel,
#[error("Invalid bytes per line")]
InvalidBytesPerLine,
#[error("Invalid image data")]
InvalidImageData,
#[error("Uninitialized error")]
UninitializedError,
#[error("Invalid enum value: {0}")]
InvalidEnumValue(i32),
#[error("String contains null byte")]
NullByteInString,
}
/// Result type for Tesseract operations.
pub type Result<T> = std::result::Result<T, TesseractError>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_display() {
let error = TesseractError::InitError;
assert_eq!(error.to_string(), "Failed to initialize Tesseract");
let error = TesseractError::SetImageError;
assert_eq!(error.to_string(), "Failed to set image");
let error = TesseractError::OcrError;
assert_eq!(error.to_string(), "OCR operation failed");
}
#[test]
fn test_utf8_error_conversion() {
let invalid_utf8 = vec![0xFF, 0xFE];
let utf8_error = std::str::from_utf8(&invalid_utf8).unwrap_err();
let tess_error: TesseractError = utf8_error.into();
match tess_error {
TesseractError::Utf8Error(_) => {}
_ => panic!("Expected Utf8Error variant"),
}
}
#[test]
fn test_error_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<TesseractError>();
}
}

View File

@@ -0,0 +1,807 @@
//! Safe Leptonica Pix wrapper for image preprocessing before OCR.
//!
//! Provides a safe Rust wrapper around the Leptonica image-processing library.
//! `Pix` is the core Leptonica image type. All methods return `Result<Pix>`,
//! and the wrapper takes care of proper memory management via `Drop`.
//!
//! ## Pixel format
//!
//! Leptonica's 32 bpp format stores each pixel as a native 32-bit integer
//! with the logical layout (MSB→LSB): `R G B A`, i.e.
//! `(r << 24) | (g << 16) | (b << 8) | alpha`. Leptonica accesses
//! individual channels via bit-shift on the integer value, not via
//! byte-addressed pointer arithmetic, so the packing is identical on both
//! big- and little-endian hosts. Do **not** call `pixEndianByteSwap` after
//! writing pixels this way — doing so inverts the channel order.
//!
//! ## `pixDeskew` requires a binary (1 bpp) image
//!
//! Call `to_grayscale()` followed by `adaptive_threshold()` before `deskew()`.
//! `pixDeskew` internally calls `pixFindSkewSweepAndSearchScorePivot` which
//! operates on 1-bit images only; passing a colour image will return a null
//! pointer.
use crate::error::{Result, TesseractError};
use std::ffi::c_void;
// ---------------------------------------------------------------------------
// Raw Leptonica FFI declarations
// ---------------------------------------------------------------------------
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
ffi_extern! {
/// Allocates a new Pix with the given dimensions and bit depth.
fn pixCreate(width: i32, height: i32, depth: i32) -> *mut c_void;
/// Frees a Pix and sets the caller's pointer to null.
///
/// Leptonica uses a double-pointer convention: `*ppix` is set to null
/// after the call so that accidental double-frees are a no-op.
fn pixDestroy(ppix: *mut *mut c_void);
/// Sets the horizontal and vertical resolution (DPI) on a Pix.
///
/// Returns 0 on success, non-zero on error.
fn pixSetResolution(pix: *mut c_void, xres: i32, yres: i32) -> i32;
/// Returns the width of the Pix in pixels.
fn pixGetWidth(pix: *const c_void) -> i32;
/// Returns the height of the Pix in pixels.
fn pixGetHeight(pix: *const c_void) -> i32;
/// Returns the bit depth of the Pix (1, 2, 4, 8, 16, or 32).
fn pixGetDepth(pix: *const c_void) -> i32;
/// Returns the number of 32-bit words per row (words-per-line).
fn pixGetWpl(pix: *const c_void) -> i32;
/// Returns a mutable pointer to the start of the pixel data array.
///
/// The data is stored as rows of 32-bit words; each word covers 32/depth pixels.
fn pixGetData(pix: *mut c_void) -> *mut u32;
/// Deskews a 1 bpp image using a sweep-and-search algorithm.
///
/// `redsearch` is the reduction factor used during the search; pass 0 for
/// the Leptonica default (2x reduction). Returns a new deskewed Pix on
/// success, or null on failure. The input Pix is **not** consumed.
fn pixDeskew(pixs: *mut c_void, redsearch: i32) -> *mut c_void;
/// Estimates the skew angle and confidence for a 1 bpp image.
///
/// Writes the angle (degrees, positive = counter-clockwise) into `*pangle`
/// and a confidence score (01) into `*pconf`. Returns 0 on success.
fn pixFindSkew(pixs: *mut c_void, pangle: *mut f32, pconf: *mut f32) -> i32;
/// Applies Otsu adaptive thresholding to produce a binarised Pix.
///
/// `sx`/`sy` are the tile dimensions; `smoothx`/`smoothy` are half-widths
/// for smoothing the threshold map; `scorefract` controls threshold acceptance
/// (typical value: 0.1). `ppixth` (optional) receives the threshold image;
/// `ppixd` receives the binarised output.
fn pixOtsuAdaptiveThreshold(
pixs: *mut c_void,
sx: i32,
sy: i32,
smoothx: i32,
smoothy: i32,
scorefract: f32,
ppixth: *mut *mut c_void,
ppixd: *mut *mut c_void,
) -> i32;
/// Normalises the background of a grayscale image using morphological operations.
///
/// `reduction` is the subsampling factor (e.g. 4), `size` is the morphological
/// structuring-element half-size (e.g. 15), and `bgval` is the target background
/// value (e.g. 200). Returns a new normalised Pix, or null on failure.
fn pixBackgroundNormMorph(
pixs: *mut c_void,
pixim: *mut c_void,
reduction: i32,
size: i32,
bgval: i32,
) -> *mut c_void;
/// Applies unsharp masking to sharpen a grayscale or colour Pix.
///
/// `halfwidth` is the half-size of the blur kernel; `fract` controls the
/// sharpening strength (0.01.0 typical). Returns a new Pix, or null on failure.
fn pixUnsharpMasking(pixs: *mut c_void, halfwidth: i32, fract: f32) -> *mut c_void;
/// Scales a Pix by independent x and y factors using the best available method.
///
/// Returns a new scaled Pix, or null on failure. The input Pix is **not** consumed.
fn pixScale(pixs: *mut c_void, scalex: f32, scaley: f32) -> *mut c_void;
/// Converts an RGB (32 bpp) Pix to 8 bpp grayscale.
///
/// `rwt`, `gwt`, `bwt` are the red, green, and blue channel weights; pass
/// 0.0 for all three to use Leptonica's default equal weights. Returns a new
/// 8 bpp Pix, or null on failure.
fn pixConvertRGBToGray(pixs: *mut c_void, rwt: f32, gwt: f32, bwt: f32) -> *mut c_void;
/// Creates a Leptonica BOX with the given coordinates.
fn boxCreate(x: i32, y: i32, w: i32, h: i32) -> *mut c_void;
/// Frees a Leptonica BOX.
fn boxDestroy(pbox: *mut *mut c_void);
/// Clips a rectangular region from a Pix.
///
/// Returns a new Pix containing the clipped region, or null on failure.
/// `pboxc` (optional) receives the actual clipped box; pass null to ignore.
fn pixClipRectangle(pixs: *mut c_void, box_: *mut c_void, pboxc: *mut *mut c_void) -> *mut c_void;
/// Counts connected components in a 1 bpp image.
///
/// `connectivity` is 4 or 8. Writes the count to `*pcount`.
/// Returns 0 on success.
fn pixCountConnComp(pix: *mut c_void, connectivity: i32, pcount: *mut i32) -> i32;
/// Retrieves the horizontal and vertical resolution (DPI) from a Pix.
///
/// Writes the x-resolution into `*pxres` and y-resolution into `*pyres`.
/// Returns 0 on success, non-zero on error.
fn pixGetResolution(pix: *const c_void, pxres: *mut i32, pyres: *mut i32) -> i32;
}
// ---------------------------------------------------------------------------
// Safe Pix wrapper
// ---------------------------------------------------------------------------
/// Safe wrapper around a Leptonica `PIX *` image object.
///
/// Owns the underlying allocation and frees it in `Drop`. All methods that
/// return a new image allocate a fresh `Pix`; the receiver is never consumed.
///
/// # Thread safety
///
/// `Pix` is `Send` because Leptonica image objects are independent heap
/// allocations with no shared mutable state. Concurrent mutation from multiple
/// threads is **not** safe (no `Sync`).
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
pub struct Pix {
ptr: *mut c_void,
}
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
impl std::fmt::Debug for Pix {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Pix").field("ptr", &self.ptr).finish()
}
}
// SAFETY: A Pix owns a uniquely heap-allocated Leptonica PIX. There is no
// interior mutability shared across thread boundaries, so transferring
// ownership to another thread is safe.
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
unsafe impl Send for Pix {}
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
impl Pix {
// -----------------------------------------------------------------------
// Construction
// -----------------------------------------------------------------------
/// Creates a 32 bpp Leptonica Pix from a packed RGB byte slice.
///
/// `data` must contain exactly `width * height * 3` bytes in left-to-right,
/// top-to-bottom, `R G B` interleaved order.
///
/// The DPI is set to 300 × 300 which is a sensible default for OCR input.
///
/// # Errors
///
/// Returns `TesseractError::InvalidImageData` if `data` length does not
/// match `width * height * 3`, if either dimension is zero, or if
/// Leptonica's `pixCreate` returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// let rgb = vec![255u8; 4 * 4 * 3]; // 4×4 white image
/// let pix = Pix::from_raw_rgb(&rgb, 4, 4).unwrap();
/// assert_eq!(pix.width(), 4);
/// assert_eq!(pix.height(), 4);
/// assert_eq!(pix.depth(), 32);
/// ```
pub fn from_raw_rgb(data: &[u8], width: u32, height: u32) -> Result<Pix> {
let expected = (width as usize)
.checked_mul(height as usize)
.and_then(|n| n.checked_mul(3))
.ok_or(TesseractError::InvalidImageData)?;
if data.len() != expected || width == 0 || height == 0 {
return Err(TesseractError::InvalidImageData);
}
// SAFETY: pixCreate() allocates a new PIX with the requested dimensions.
// It is safe because:
// 1. width, height, and depth (32) are valid positive integers.
// 2. pixCreate() documents that it returns null only on allocation
// failure, which we check immediately below.
let pix_ptr = unsafe { pixCreate(width as i32, height as i32, 32) };
if pix_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: pixGetData() returns a mutable pointer into the allocated pixel
// buffer that is valid for the lifetime of the Pix. We own pix_ptr
// exclusively at this point and have not exposed it to any other code.
let data_ptr = unsafe { pixGetData(pix_ptr) };
if data_ptr.is_null() {
// Clean up before returning the error.
// SAFETY: pix_ptr is a valid non-null allocation from pixCreate().
// Passing &mut pix_ptr satisfies the double-pointer convention; after
// this call pix_ptr is set to null by Leptonica.
let mut ptr = pix_ptr;
unsafe { pixDestroy(&mut ptr) };
return Err(TesseractError::NullPointerError);
}
// SAFETY: pixGetWpl() is a pure read of the Pix header that is always
// valid for a correctly-allocated Pix.
// For a 32 bpp image, each pixel occupies exactly one 32-bit word, so
// wpl == width (no padding bytes). The loop below uses `row * wpl + col`
// to index into the pixel data, which is within bounds because col < width <= wpl.
let wpl = unsafe { pixGetWpl(pix_ptr) } as usize;
// Write RGB pixels into the Leptonica data buffer.
//
// Leptonica's 32 bpp pixel format stores each pixel as a native
// 32-bit integer word with the logical layout (MSB→LSB): R G B A,
// i.e. `(r << 24) | (g << 16) | (b << 8) | alpha`. This is the
// same bit pattern regardless of host endianness — Leptonica treats
// the data as an array of 32-bit integers and accesses individual
// bytes via bit-shift, not via byte-addressed pointer arithmetic.
//
// Therefore we pack directly as `(r << 24) | (g << 16) | (b << 8) | 0xFF`
// and write the resulting u32 without any byte-swapping. Calling
// `pixEndianByteSwap` would invert the channel order, producing
// A B G R instead of R G B A.
for row in 0..(height as usize) {
for col in 0..(width as usize) {
let src = (row * width as usize + col) * 3;
let r = data[src] as u32;
let g = data[src + 1] as u32;
let b = data[src + 2] as u32;
// Pack channels as (MSB) R G B A (LSB) in the 32-bit integer.
let word: u32 = (r << 24) | (g << 16) | (b << 8) | 0xFF;
// SAFETY: data_ptr is a valid writable pointer into the Leptonica
// pixel buffer. The offset `row * wpl + col` is within bounds because:
// 1. wpl >= width (Leptonica pads rows to 32-bit word boundaries).
// 2. row < height and col < width by loop invariants.
unsafe {
*data_ptr.add(row * wpl + col) = word;
}
}
}
// Set a sensible default DPI for OCR processing.
// SAFETY: pix_ptr is valid and non-null. pixSetResolution only writes
// two integer fields in the Pix header.
unsafe { pixSetResolution(pix_ptr, 300, 300) };
Ok(Pix { ptr: pix_ptr })
}
// -----------------------------------------------------------------------
// Image processing operations
// -----------------------------------------------------------------------
/// Deskews this image, returning a new corrected Pix.
///
/// **Note:** `pixDeskew` requires a 1 bpp (binary) image. Call
/// `to_grayscale()` followed by `adaptive_threshold()` before invoking
/// this method on a colour or grayscale Pix.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if Leptonica returns null
/// (typically because the input is not 1 bpp or the image is too small).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![0u8; 100 * 100 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 100, 100).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let binary = gray.adaptive_threshold(32, 32).unwrap();
/// let deskewed = binary.deskew().unwrap();
/// ```
pub fn deskew(&self) -> Result<Pix> {
// SAFETY: self.ptr is a valid non-null Pix we own. pixDeskew() does
// not take ownership; it creates and returns a new Pix allocation.
// We check for null to handle the case where the operation fails
// (e.g. input is not 1 bpp).
let result = unsafe { pixDeskew(self.ptr, 0) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Estimates the skew angle (degrees) and confidence (01) for this image.
///
/// A positive angle indicates counter-clockwise skew. Confidence near 1.0
/// means a clear dominant skew direction was found.
///
/// **Note:** Like `deskew`, this operates on 1 bpp images.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixFindSkew` returns a non-zero
/// status (e.g. insufficient contrast or wrong bit depth).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![0u8; 100 * 100 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 100, 100).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let binary = gray.adaptive_threshold(32, 32).unwrap();
/// let (angle, confidence) = binary.find_skew().unwrap();
/// println!("Skew: {angle:.2}° (confidence {confidence:.2})");
/// ```
pub fn find_skew(&self) -> Result<(f32, f32)> {
let mut angle: f32 = 0.0;
let mut conf: f32 = 0.0;
// SAFETY: self.ptr is valid and non-null. We pass pointers to local
// stack-allocated f32 values, which are valid write targets for the
// duration of this call. pixFindSkew() writes into them and returns
// an integer status code.
let status = unsafe { pixFindSkew(self.ptr, &mut angle, &mut conf) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok((angle, conf))
}
}
/// Binarises this image using Otsu adaptive thresholding.
///
/// `tile_width` and `tile_height` control the size of the local regions
/// used to compute the threshold. Values around 1664 work well for typical
/// document images; smaller tiles follow local contrast more closely.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if Leptonica returns null, or
/// `TesseractError::OcrError` if `pixOtsuAdaptiveThreshold` returns a
/// non-zero status.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![128u8; 64 * 64 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 64, 64).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let binary = gray.adaptive_threshold(32, 32).unwrap();
/// assert_eq!(binary.depth(), 1);
/// ```
pub fn adaptive_threshold(&self, tile_width: i32, tile_height: i32) -> Result<Pix> {
let mut result: *mut c_void = std::ptr::null_mut();
// SAFETY: self.ptr is a valid non-null Pix. We pass null for ppixth
// because we do not need the intermediate threshold image. result is a
// local pointer that will be written by pixOtsuAdaptiveThreshold(); we
// check it for null before wrapping in a Pix.
let status = unsafe {
pixOtsuAdaptiveThreshold(
self.ptr,
tile_width,
tile_height,
0, // smoothx: no smoothing
0, // smoothy: no smoothing
0.1, // scorefract: Leptonica-recommended default
std::ptr::null_mut(), // ppixth: we don't need the threshold map
&mut result,
)
};
if status != 0 {
return Err(TesseractError::OcrError);
}
if result.is_null() {
return Err(TesseractError::NullPointerError);
}
Ok(Pix { ptr: result })
}
/// Returns the horizontal and vertical resolution (DPI) of this image.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixGetResolution` fails.
pub fn get_resolution(&self) -> Result<(i32, i32)> {
let mut xres: i32 = 0;
let mut yres: i32 = 0;
// SAFETY: self.ptr is a valid non-null Pix. xres and yres are valid
// stack-allocated i32 values. pixGetResolution reads the Pix header.
let status = unsafe { pixGetResolution(self.ptr, &mut xres, &mut yres) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok((xres, yres))
}
}
/// Sets the horizontal and vertical resolution (DPI) on this image.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixSetResolution` fails.
pub fn set_resolution(&mut self, xres: i32, yres: i32) -> Result<()> {
// SAFETY: self.ptr is a valid non-null Pix. pixSetResolution only
// writes two integer fields in the Pix header.
let status = unsafe { pixSetResolution(self.ptr, xres, yres) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok(())
}
}
/// Ensures the image has a valid (non-zero) DPI resolution.
///
/// If both x and y resolution are zero, sets them to 72 DPI as a
/// safe fallback. This prevents Leptonica operations that depend on
/// resolution metadata from producing incorrect results.
fn ensure_valid_resolution(&self) {
if let Ok((xres, yres)) = self.get_resolution()
&& (xres == 0 || yres == 0)
{
// SAFETY: self.ptr is valid. We set a safe default DPI.
unsafe { pixSetResolution(self.ptr, 72, 72) };
}
}
/// Normalises the background of this image using morphological operations.
///
/// Useful as a preprocessing step when the document has uneven illumination
/// or a non-white background. Returns a new normalised Pix.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixBackgroundNormMorph`
/// returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![200u8; 100 * 100 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 100, 100).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// let normalised = gray.background_normalize().unwrap();
/// ```
pub fn background_normalize(&self) -> Result<Pix> {
self.ensure_valid_resolution();
// SAFETY: self.ptr is a valid non-null Pix. We pass null for pixim
// (no mask image). pixBackgroundNormMorph() returns a newly allocated
// Pix or null on failure.
let result = unsafe {
pixBackgroundNormMorph(
self.ptr,
std::ptr::null_mut(), // pixim: no mask
4, // reduction: 4x subsampling
15, // size: morphological SE half-size
200, // bgval: target background value
)
};
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Applies unsharp masking to sharpen this image.
///
/// `halfwidth` is the half-size of the blur kernel (e.g. 15).
/// `fract` is the sharpening fraction in the range 0.01.0; values
/// around 0.30.5 produce visible sharpening without artefacts.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixUnsharpMasking`
/// returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![128u8; 64 * 64 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 64, 64).unwrap();
/// let sharpened = pix.unsharp_mask(2, 0.4).unwrap();
/// ```
pub fn unsharp_mask(&self, halfwidth: i32, fract: f32) -> Result<Pix> {
self.ensure_valid_resolution();
// SAFETY: self.ptr is valid and non-null. pixUnsharpMasking() returns
// a new Pix without modifying or taking ownership of the source.
let result = unsafe { pixUnsharpMasking(self.ptr, halfwidth, fract) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Scales this image by independent x and y factors.
///
/// Leptonica automatically chooses the best scaling algorithm based on
/// the scale factors and bit depth (area mapping for downscaling,
/// linear interpolation for upscaling).
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixScale` returns null.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![255u8; 40 * 40 * 3];
/// # let pix = Pix::from_raw_rgb(&rgb, 40, 40).unwrap();
/// let upscaled = pix.scale(2.0, 2.0).unwrap();
/// assert_eq!(upscaled.width(), 80);
/// assert_eq!(upscaled.height(), 80);
/// ```
pub fn scale(&self, sx: f32, sy: f32) -> Result<Pix> {
// SAFETY: self.ptr is valid and non-null. pixScale() creates a new Pix
// and does not modify the source.
let result = unsafe { pixScale(self.ptr, sx, sy) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Clips a rectangular sub-region from this image.
///
/// Returns a new Pix containing only the pixels within the given rectangle.
/// Coordinates are in pixel space: (x, y) is the top-left corner.
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if the crop fails.
pub fn clip_rectangle(&self, x: i32, y: i32, w: i32, h: i32) -> Result<Pix> {
// SAFETY: boxCreate allocates a new BOX on the heap.
let box_ = unsafe { boxCreate(x, y, w, h) };
if box_.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: pixClipRectangle returns a new Pix clipped to the BOX region.
// We pass null for pboxc (we don't need the clipped box coordinates back).
let result = unsafe { pixClipRectangle(self.ptr, box_, std::ptr::null_mut()) };
// SAFETY: Free the BOX we allocated.
let mut box_mut = box_;
unsafe { boxDestroy(&mut box_mut) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
/// Counts connected components in a 1 bpp (binary) image.
///
/// `connectivity` should be 4 or 8.
///
/// # Errors
///
/// Returns `TesseractError::OcrError` if `pixCountConnComp` fails
/// (e.g., wrong bit depth — image must be 1 bpp).
pub fn count_connected_components(&self, connectivity: i32) -> Result<i32> {
let mut count: i32 = 0;
// SAFETY: self.ptr is a valid Pix. count is a valid stack local.
let status = unsafe { pixCountConnComp(self.ptr, connectivity, &mut count) };
if status != 0 {
Err(TesseractError::OcrError)
} else {
Ok(count)
}
}
/// Converts this 32 bpp RGB image to an 8 bpp grayscale Pix.
///
/// Passing 0.0 for all weight parameters instructs Leptonica to use its
/// default perceptual weights (approx. 0.299 R, 0.587 G, 0.114 B).
///
/// # Errors
///
/// Returns `TesseractError::NullPointerError` if `pixConvertRGBToGray`
/// returns null (e.g. the source is not 32 bpp).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let rgb = vec![100u8, 150u8, 200u8].repeat(10 * 10);
/// # let pix = Pix::from_raw_rgb(&rgb, 10, 10).unwrap();
/// let gray = pix.to_grayscale().unwrap();
/// assert_eq!(gray.depth(), 8);
/// ```
pub fn to_grayscale(&self) -> Result<Pix> {
self.ensure_valid_resolution();
// SAFETY: self.ptr is valid and non-null. pixConvertRGBToGray() returns
// a new 8 bpp Pix; the source is not modified.
let result = unsafe { pixConvertRGBToGray(self.ptr, 0.0, 0.0, 0.0) };
if result.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(Pix { ptr: result })
}
}
// -----------------------------------------------------------------------
// Accessors
// -----------------------------------------------------------------------
/// Returns the raw Leptonica `PIX *` pointer.
///
/// Intended for passing this image to `TesseractAPI::set_image_2`.
///
/// # Safety
///
/// The caller must ensure the `Pix` outlives any use of the returned
/// pointer. `TessBaseAPISetImage2` **borrows** the pointer — it does not
/// take ownership — so the `Pix` must remain alive until after
/// `TessBaseAPIRecognize` (or any other Tesseract call that consumes the
/// image data) has completed. Dropping the `Pix` while Tesseract holds
/// the pointer will result in a use-after-free.
///
/// The caller must **not** free the returned pointer; `Pix::drop` is
/// solely responsible for deallocation via `pixDestroy`.
pub fn as_ptr(&self) -> *mut c_void {
self.ptr
}
/// Returns the width of the image in pixels.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let pix = Pix::from_raw_rgb(&vec![0u8; 8 * 6 * 3], 8, 6).unwrap();
/// assert_eq!(pix.width(), 8);
/// ```
pub fn width(&self) -> i32 {
// SAFETY: self.ptr is a valid non-null Pix. pixGetWidth() is a pure
// read of the Pix header struct; it does not mutate any state.
unsafe { pixGetWidth(self.ptr) }
}
/// Returns the height of the image in pixels.
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let pix = Pix::from_raw_rgb(&vec![0u8; 8 * 6 * 3], 8, 6).unwrap();
/// assert_eq!(pix.height(), 6);
/// ```
pub fn height(&self) -> i32 {
// SAFETY: self.ptr is a valid non-null Pix. pixGetHeight() is a pure
// read of the Pix header struct.
unsafe { pixGetHeight(self.ptr) }
}
/// Returns the bit depth of the image (1, 8, or 32 for this module's usage).
///
/// # Examples
///
/// ```rust,no_run
/// # use kreuzberg_tesseract::Pix;
/// # let pix = Pix::from_raw_rgb(&vec![0u8; 4 * 4 * 3], 4, 4).unwrap();
/// assert_eq!(pix.depth(), 32);
/// ```
pub fn depth(&self) -> i32 {
// SAFETY: self.ptr is a valid non-null Pix. pixGetDepth() is a pure
// read of the Pix header struct.
unsafe { pixGetDepth(self.ptr) }
}
}
// ---------------------------------------------------------------------------
// Drop implementation
// ---------------------------------------------------------------------------
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
impl Drop for Pix {
fn drop(&mut self) {
if !self.ptr.is_null() {
// SAFETY: self.ptr is a non-null Leptonica PIX that we allocated and
// own exclusively. pixDestroy() takes a double pointer, sets *ppix to
// null after freeing, and is safe to call exactly once per allocation.
// After this call self.ptr is null (Leptonica sets it), preventing
// any double-free if drop() were somehow called again.
unsafe { pixDestroy(&mut self.ptr) };
}
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
mod tests {
use super::*;
fn make_rgb_pix(width: u32, height: u32, fill: u8) -> Pix {
let data = vec![fill; (width * height * 3) as usize];
Pix::from_raw_rgb(&data, width, height).expect("from_raw_rgb failed")
}
#[test]
fn test_from_raw_rgb_dimensions() {
let pix = make_rgb_pix(16, 8, 200);
assert_eq!(pix.width(), 16);
assert_eq!(pix.height(), 8);
assert_eq!(pix.depth(), 32);
}
#[test]
fn test_from_raw_rgb_wrong_length() {
let data = vec![0u8; 10]; // too short for 4×4
let err = Pix::from_raw_rgb(&data, 4, 4).unwrap_err();
assert!(matches!(err, TesseractError::InvalidImageData));
}
#[test]
fn test_from_raw_rgb_zero_dimensions() {
let err = Pix::from_raw_rgb(&[], 0, 4).unwrap_err();
assert!(matches!(err, TesseractError::InvalidImageData));
let err = Pix::from_raw_rgb(&[], 4, 0).unwrap_err();
assert!(matches!(err, TesseractError::InvalidImageData));
}
#[test]
fn test_as_ptr_is_non_null() {
let pix = make_rgb_pix(8, 8, 128);
assert!(!pix.as_ptr().is_null());
}
#[test]
fn test_to_grayscale() {
let pix = make_rgb_pix(32, 32, 150);
let gray = pix.to_grayscale().expect("to_grayscale failed");
assert_eq!(gray.width(), 32);
assert_eq!(gray.height(), 32);
assert_eq!(gray.depth(), 8);
}
#[test]
fn test_scale_up() {
let pix = make_rgb_pix(20, 10, 100);
let scaled = pix.scale(2.0, 2.0).expect("scale failed");
assert_eq!(scaled.width(), 40);
assert_eq!(scaled.height(), 20);
}
#[test]
fn test_unsharp_mask_returns_same_dimensions() {
let pix = make_rgb_pix(32, 32, 200);
let sharpened = pix.unsharp_mask(2, 0.4).expect("unsharp_mask failed");
assert_eq!(sharpened.width(), 32);
assert_eq!(sharpened.height(), 32);
}
#[test]
fn test_adaptive_threshold_produces_1bpp() {
let pix = make_rgb_pix(64, 64, 180);
let gray = pix.to_grayscale().expect("to_grayscale failed");
let binary = gray.adaptive_threshold(32, 32).expect("adaptive_threshold failed");
assert_eq!(binary.depth(), 1);
}
}

View File

@@ -0,0 +1,218 @@
#![cfg_attr(
not(any(feature = "build-tesseract", feature = "build-tesseract-wasm")),
allow(unused_variables, dead_code)
)]
#![allow(clippy::arc_with_non_send_sync)]
#![allow(clippy::missing_transmute_annotations)]
#![allow(clippy::type_complexity)]
#![allow(clippy::new_without_default)]
#![allow(clippy::not_unsafe_ptr_arg_deref)]
#![allow(clippy::cmp_null)]
//! # kreuzberg-tesseract
//!
//! `kreuzberg-tesseract` provides safe Rust bindings for Tesseract OCR with built-in compilation
//! of Tesseract and Leptonica libraries. This crate aims to make OCR functionality
//! easily accessible in Rust projects while handling the complexity of interfacing
//! with the underlying C++ libraries.
//!
//! ## Usage
//!
//! Here's a basic example of how to use `kreuzberg-tesseract`:
//!
//! ```rust
//! use std::path::PathBuf;
//! use std::error::Error;
//! use kreuzberg_tesseract::TesseractAPI;
//!
//! fn get_default_tessdata_dir() -> PathBuf {
//! if cfg!(target_os = "macos") {
//! let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
//! PathBuf::from(home_dir)
//! .join("Library")
//! .join("Application Support")
//! .join("kreuzberg-tesseract")
//! .join("tessdata")
//! } else if cfg!(target_os = "linux") {
//! let home_dir = std::env::var("HOME").expect("HOME environment variable not set");
//! PathBuf::from(home_dir)
//! .join(".kreuzberg-tesseract")
//! .join("tessdata")
//! } else if cfg!(target_os = "windows") {
//! PathBuf::from(std::env::var("APPDATA").expect("APPDATA environment variable not set"))
//! .join("kreuzberg-tesseract")
//! .join("tessdata")
//! } else {
//! panic!("Unsupported operating system");
//! }
//! }
//!
//! fn get_tessdata_dir() -> PathBuf {
//! match std::env::var("TESSDATA_PREFIX") {
//! Ok(dir) => {
//! let path = PathBuf::from(dir);
//! let path = if path.ends_with("tessdata") { path } else { path.join("tessdata") };
//! println!("Using TESSDATA_PREFIX directory: {:?}", path);
//! path
//! }
//! Err(_) => {
//! let default_dir = get_default_tessdata_dir();
//! println!(
//! "TESSDATA_PREFIX not set, using default directory: {:?}",
//! default_dir
//! );
//! default_dir
//! }
//! }
//! }
//!
//! fn main() -> Result<(), Box<dyn Error>> {
//! let api = TesseractAPI::new()?;
//!
//! // Get tessdata directory (uses default location or TESSDATA_PREFIX if set)
//! let tessdata_dir = get_tessdata_dir();
//! api.init(tessdata_dir.to_str().unwrap(), "eng")?;
//!
//! let width = 24;
//! let height = 24;
//! let bytes_per_pixel = 1;
//! let bytes_per_line = width * bytes_per_pixel;
//!
//! // Initialize image data with all white pixels
//! let mut image_data = vec![255u8; width * height];
//!
//! // Draw number 9 with clearer distinction
//! for y in 4..19 {
//! for x in 7..17 {
//! // Top bar
//! if y == 4 && x >= 8 && x <= 15 {
//! image_data[y * width + x] = 0;
//! }
//! // Top curve left side
//! if y >= 4 && y <= 10 && x == 7 {
//! image_data[y * width + x] = 0;
//! }
//! // Top curve right side
//! if y >= 4 && y <= 11 && x == 16 {
//! image_data[y * width + x] = 0;
//! }
//! // Middle bar
//! if y == 11 && x >= 8 && x <= 15 {
//! image_data[y * width + x] = 0;
//! }
//! // Bottom right vertical line
//! if y >= 11 && y <= 18 && x == 16 {
//! image_data[y * width + x] = 0;
//! }
//! // Bottom bar
//! if y == 18 && x >= 8 && x <= 15 {
//! image_data[y * width + x] = 0;
//! }
//! }
//! }
//!
//! // Set the image data
//! api.set_image(&image_data, width.try_into().unwrap(), height.try_into().unwrap(), bytes_per_pixel.try_into().unwrap(), bytes_per_line.try_into().unwrap())?;
//!
//! // Set whitelist for digits only
//! api.set_variable("tessedit_char_whitelist", "0123456789")?;
//!
//! // Set PSM mode to single character
//! api.set_variable("tessedit_pageseg_mode", "10")?;
//!
//! // Get the recognized text
//! let text = api.get_utf8_text()?;
//! println!("Recognized text: {}", text.trim());
//!
//! Ok(())
//! }
//! ```
/// Declare FFI functions with `extern "C-unwind"` on native targets (to catch
/// C++ exceptions from Tesseract/Leptonica) and `extern "C"` on WASM (where
/// the LLVM backend does not support `cleanupret` / C++ unwinding).
macro_rules! ffi_extern {
(
$(
$(#[$meta:meta])*
$vis:vis fn $name:ident($($arg:ident : $ty:ty),* $(,)?) $(-> $ret:ty)?;
)*
) => {
#[cfg(not(target_arch = "wasm32"))]
unsafe extern "C-unwind" {
$(
$(#[$meta])*
$vis fn $name($($arg : $ty),*) $(-> $ret)?;
)*
}
#[cfg(target_arch = "wasm32")]
unsafe extern "C" {
$(
$(#[$meta])*
$vis fn $name($($arg : $ty),*) $(-> $ret)?;
)*
}
};
}
pub use error::{Result, TesseractError};
mod error;
// WASM: Override __cxa_atexit to be a no-op. WASI SDK's __cxa_atexit calls calloc during
// C++ static initialization, which crashes because dlmalloc's heap isn't properly set up
// for wasm32-unknown-unknown. Since WASM modules never exit normally, atexit handlers
// are unnecessary.
#[cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))]
mod wasm_compat {
#[unsafe(no_mangle)]
pub unsafe extern "C" fn __cxa_atexit(
_func: Option<unsafe extern "C" fn(*mut core::ffi::c_void)>,
_arg: *mut core::ffi::c_void,
_dso_handle: *mut core::ffi::c_void,
) -> i32 {
0 // Success, but don't actually register anything
}
}
mod page_iterator;
pub use page_iterator::{BlockInfo, PageIterator, ParaInfo};
mod result_iterator;
pub use result_iterator::{FontAttributes, ResultIterator, WordData};
mod choice_iterator;
pub use choice_iterator::ChoiceIterator;
mod monitor;
pub use monitor::TessMonitor;
mod result_renderer;
pub use result_renderer::TessResultRenderer;
mod mutable_iterator;
pub use mutable_iterator::MutableIterator;
mod enums;
pub use enums::{
TessOrientation, TessPageIteratorLevel, TessPageSegMode, TessParagraphJustification, TessPolyBlockType,
TessTextlineOrder, TessWritingDirection,
};
mod api;
pub use api::{BoundingBoxArray, TesseractAPI};
pub mod leptonica;
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
pub use leptonica::Pix;
/// Returns the compile-time-bundled English `eng.traineddata` blob when the
/// `bundle-tessdata-eng` feature is enabled, otherwise `None`.
///
/// The bundled data is the `tessdata_fast` variant (~4 MB) downloaded by
/// `build.rs` to `TESSDATA_PREFIX_BUNDLED/tessdata/eng.traineddata`. Embedding
/// it lets WASM builds drive Tesseract OCR without filesystem access or
/// runtime fetches.
#[cfg(feature = "bundle-tessdata-eng")]
pub fn bundled_eng_traineddata() -> Option<&'static [u8]> {
Some(include_bytes!(concat!(
env!("TESSDATA_PREFIX_BUNDLED"),
"/tessdata/eng.traineddata"
)))
}
/// Returns `None` when the `bundle-tessdata-eng` feature is disabled.
#[cfg(not(feature = "bundle-tessdata-eng"))]
pub fn bundled_eng_traineddata() -> Option<&'static [u8]> {
None
}

View File

@@ -0,0 +1,68 @@
use crate::error::{Result, TesseractError};
use std::os::raw::{c_int, c_void};
use std::sync::{Arc, Mutex};
pub struct TessMonitor {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for TessMonitor {}
unsafe impl Sync for TessMonitor {}
impl TessMonitor {
/// Creates a new instance of the TessMonitor.
///
/// # Returns
///
/// Returns the new instance of the TessMonitor.
pub fn new() -> Self {
let handle = unsafe { TessMonitorCreate() };
TessMonitor {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Sets the deadline for the monitor.
///
/// # Arguments
///
/// * `deadline` - Deadline in milliseconds.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn set_deadline(&self, deadline: i32) -> Result<()> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
unsafe { TessMonitorSetDeadlineMSecs(*handle, deadline) };
Ok(())
}
/// Gets the progress of the monitor.
///
/// # Returns
///
/// Returns the progress as an `i32` if successful, otherwise returns an error.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn get_progress(&self) -> Result<i32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessMonitorGetProgress(*handle) })
}
}
impl Drop for TessMonitor {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessMonitorDelete(*handle) };
}
}
}
ffi_extern! {
pub fn TessMonitorCreate() -> *mut c_void;
pub fn TessMonitorDelete(monitor: *mut c_void);
pub fn TessMonitorSetDeadlineMSecs(monitor: *mut c_void, deadline: c_int);
pub fn TessMonitorGetProgress(monitor: *mut c_void) -> c_int;
}

View File

@@ -0,0 +1,197 @@
use crate::error::{Result, TesseractError};
use std::ffi::CStr;
use std::os::raw::{c_char, c_void};
use std::sync::Arc;
use std::sync::Mutex;
use crate::result_iterator::{
TessResultIteratorConfidence, TessResultIteratorGetUTF8Text, TessResultIteratorNext,
TessResultIteratorSymbolIsDropcap, TessResultIteratorSymbolIsSubscript, TessResultIteratorSymbolIsSuperscript,
TessResultIteratorWordFontAttributes, TessResultIteratorWordIsFromDictionary, TessResultIteratorWordIsNumeric,
TessResultIteratorWordRecognitionLanguage,
};
pub struct MutableIterator {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for MutableIterator {}
unsafe impl Sync for MutableIterator {}
impl MutableIterator {
/// Creates a new instance of the MutableIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the MutableIterator.
pub fn new(handle: *mut c_void) -> Self {
MutableIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Gets the UTF-8 text for the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the text.
pub fn get_utf8_text(&self, level: i32) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
let text_ptr = unsafe { TessResultIteratorGetUTF8Text(*handle, level) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let result = c_str.to_str()?.to_owned();
unsafe { TessDeleteText(text_ptr as *mut c_char) };
Ok(result)
}
/// Gets the confidence of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the confidence.
pub fn confidence(&self, level: i32) -> Result<f32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorConfidence(*handle, level) })
}
/// Gets the recognition language of the current iterator.
///
/// # Returns
///
/// Returns the recognition language as a `String` if successful, otherwise returns an error.
pub fn word_recognition_language(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
let lang_ptr = unsafe { TessResultIteratorWordRecognitionLanguage(*handle) };
if lang_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
let c_str = unsafe { CStr::from_ptr(lang_ptr) };
Ok(c_str.to_str()?.to_owned())
}
/// Gets the font attributes of the current iterator.
///
/// # Returns
///
/// Returns the font attributes as a tuple if successful, otherwise returns an error.
pub fn word_font_attributes(&self) -> Result<(bool, bool, bool, bool, bool, bool, i32, i32)> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
let mut is_bold = 0;
let mut is_italic = 0;
let mut is_underlined = 0;
let mut is_monospace = 0;
let mut is_serif = 0;
let mut is_smallcaps = 0;
let mut pointsize = 0;
let mut font_id = 0;
let result = unsafe {
TessResultIteratorWordFontAttributes(
*handle,
&mut is_bold,
&mut is_italic,
&mut is_underlined,
&mut is_monospace,
&mut is_serif,
&mut is_smallcaps,
&mut pointsize,
&mut font_id,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
is_bold != 0,
is_italic != 0,
is_underlined != 0,
is_monospace != 0,
is_serif != 0,
is_smallcaps != 0,
pointsize,
font_id,
))
}
}
/// Checks if the current word is from the dictionary.
///
/// # Returns
///
/// Returns `Ok(true)` if the current word is from the dictionary, otherwise returns `Ok(false)`.
pub fn word_is_from_dictionary(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorWordIsFromDictionary(*handle) != 0 })
}
/// Checks if the current word is numeric.
///
/// # Returns
///
/// Returns `Ok(true)` if the current word is numeric, otherwise returns `Ok(false)`.
pub fn word_is_numeric(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorWordIsNumeric(*handle) != 0 })
}
/// Checks if the current symbol is superscript.
///
/// # Returns
///
/// Returns `Ok(true)` if the current symbol is superscript, otherwise returns `Ok(false)`.
pub fn symbol_is_superscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorSymbolIsSuperscript(*handle) != 0 })
}
/// Checks if the current symbol is subscript.
///
/// # Returns
///
/// Returns `Ok(true)` if the current symbol is subscript, otherwise returns `Ok(false)`.
pub fn symbol_is_subscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorSymbolIsSubscript(*handle) != 0 })
}
/// Checks if the current symbol is dropcap.
///
/// # Returns
///
/// Returns `Ok(true)` if the current symbol is dropcap, otherwise returns `Ok(false)`.
pub fn symbol_is_dropcap(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorSymbolIsDropcap(*handle) != 0 })
}
/// Gets the next iterator.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
///
/// # Returns
///
/// Returns `true` if the next iterator is successful, otherwise returns `false`.
pub fn next(&self, level: i32) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexError)?;
Ok(unsafe { TessResultIteratorNext(*handle, level) != 0 })
}
}
impl Drop for MutableIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessResultIteratorDelete(*handle) };
}
}
}
ffi_extern! {
pub fn TessResultIteratorDelete(handle: *mut c_void);
pub fn TessDeleteText(text: *mut c_char);
}

View File

@@ -0,0 +1,421 @@
use crate::TesseractError;
use crate::enums::{
TessOrientation, TessPageIteratorLevel, TessParagraphJustification, TessPolyBlockType, TessTextlineOrder,
TessWritingDirection,
};
use crate::error::Result;
use std::os::raw::{c_float, c_int, c_void};
use std::sync::Arc;
use std::sync::Mutex;
/// Block-level layout information from Tesseract.
#[derive(Debug, Clone)]
pub struct BlockInfo {
pub block_type: TessPolyBlockType,
pub left: i32,
pub top: i32,
pub right: i32,
pub bottom: i32,
}
/// Paragraph-level information from Tesseract.
#[derive(Debug, Clone)]
pub struct ParaInfo {
pub justification: TessParagraphJustification,
pub is_list_item: bool,
pub is_crown: bool,
pub first_line_indent: i32,
pub left: i32,
pub top: i32,
pub right: i32,
pub bottom: i32,
}
pub struct PageIterator {
pub handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for PageIterator {}
unsafe impl Sync for PageIterator {}
impl PageIterator {
/// Creates a new instance of the PageIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the PageIterator.
///
/// # Returns
///
/// Returns the new instance of the PageIterator.
pub fn new(handle: *mut c_void) -> Self {
PageIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Begins the iteration.
pub fn begin(&self) -> Result<()> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
unsafe { TessPageIteratorBegin(*handle) };
Ok(())
}
/// Gets the next iterator.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
///
/// # Returns
///
/// Returns `Result<bool>` - `Ok(true)` if the next iterator is successful, `Ok(false)` otherwise.
pub fn next(&self, level: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessPageIteratorNext(*handle, level as c_int) != 0 })
}
/// Checks if the current iterator is at the beginning of the specified level.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
///
/// # Returns
///
/// Returns `Result<bool>` - `Ok(true)` if at the beginning, `Ok(false)` otherwise.
pub fn is_at_beginning_of(&self, level: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessPageIteratorIsAtBeginningOf(*handle, level as c_int) != 0 })
}
/// Checks if the current iterator is at the final element of the specified level.
///
/// # Arguments
///
/// * `level` - Level of the iterator.
/// * `element` - Element of the iterator.
///
/// # Returns
///
/// Returns `Result<bool>` - `Ok(true)` if at the final element, `Ok(false)` otherwise.
pub fn is_at_final_element(&self, level: TessPageIteratorLevel, element: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessPageIteratorIsAtFinalElement(*handle, level as c_int, element as c_int) != 0 })
}
/// Gets the bounding box of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the bounding box.
///
/// # Returns
///
/// Returns the bounding box as a tuple if successful, otherwise returns an error.
pub fn bounding_box(&self, level: TessPageIteratorLevel) -> Result<(i32, i32, i32, i32)> {
let mut left = 0;
let mut top = 0;
let mut right = 0;
let mut bottom = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe {
TessPageIteratorBoundingBox(*handle, level as c_int, &mut left, &mut top, &mut right, &mut bottom)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((left, top, right, bottom))
}
}
/// Gets the block type of the current iterator.
///
/// # Returns
///
/// Returns the block type as a `TessPolyBlockType`.
pub fn block_type(&self) -> Result<TessPolyBlockType> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let block_type = unsafe { TessPageIteratorBlockType(*handle) };
Ok(TessPolyBlockType::from_int(block_type))
}
/// Gets the baseline of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the baseline.
///
/// # Returns
///
/// Returns the baseline as a tuple if successful, otherwise returns an error.
pub fn baseline(&self, level: i32) -> Result<(i32, i32, i32, i32)> {
let mut x1 = 0;
let mut y1 = 0;
let mut x2 = 0;
let mut y2 = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe { TessPageIteratorBaseline(*handle, level, &mut x1, &mut y1, &mut x2, &mut y2) };
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((x1, y1, x2, y2))
}
}
/// Gets the orientation of the current iterator.
///
/// # Returns
///
/// Returns the orientation as a tuple if successful, otherwise returns an error.
pub fn orientation(&self) -> Result<(TessOrientation, TessWritingDirection, TessTextlineOrder, f32)> {
let mut orientation = 0;
let mut writing_direction = 0;
let mut textline_order = 0;
let mut deskew_angle = 0.0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe {
TessPageIteratorOrientation(
*handle,
&mut orientation,
&mut writing_direction,
&mut textline_order,
&mut deskew_angle,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
TessOrientation::from_int(orientation),
TessWritingDirection::from_int(writing_direction),
TessTextlineOrder::from_int(textline_order),
deskew_angle,
))
}
}
/// Extracts all blocks from the page in a single mutex-locked pass.
///
/// Resets the iterator to the beginning, then iterates at `RIL_BLOCK` level,
/// collecting block type and bounding box for each block found.
///
/// # Returns
///
/// Returns `Ok(Vec<BlockInfo>)` with one entry per block, or an error if the
/// mutex cannot be acquired.
pub fn extract_all_blocks(&self) -> Result<Vec<BlockInfo>> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let level = TessPageIteratorLevel::RIL_BLOCK as c_int;
let mut blocks = Vec::new();
// SAFETY: `*handle` is a valid non-null TessPageIterator pointer owned by this struct.
// `TessPageIteratorBegin` resets the iterator to the first element and takes only
// the pointer — no aliasing occurs because we hold the mutex for the duration.
unsafe { TessPageIteratorBegin(*handle) };
loop {
let block_type = unsafe {
// SAFETY: `*handle` is valid; TessPageIteratorBlockType reads the current
// iterator position and returns an integer enum value without taking ownership.
TessPageIteratorBlockType(*handle)
};
let mut left: c_int = 0;
let mut top: c_int = 0;
let mut right: c_int = 0;
let mut bottom: c_int = 0;
let bbox_ok = unsafe {
// SAFETY: `*handle` is valid; the four `*mut c_int` pointers point to local
// stack variables whose lifetimes exceed this call.
TessPageIteratorBoundingBox(*handle, level, &mut left, &mut top, &mut right, &mut bottom)
};
if bbox_ok != 0 {
blocks.push(BlockInfo {
block_type: TessPolyBlockType::from_int(block_type),
left,
top,
right,
bottom,
});
}
let has_next = unsafe {
// SAFETY: `*handle` is valid; TessPageIteratorNext advances the iterator
// in-place and returns 0 when there are no more elements at this level.
TessPageIteratorNext(*handle, level)
};
if has_next == 0 {
break;
}
}
Ok(blocks)
}
/// Extracts all paragraphs from the page in a single mutex-locked pass.
///
/// Resets the iterator to the beginning, then iterates at `RIL_PARA` level,
/// collecting paragraph metadata and bounding box for each paragraph found.
///
/// # Returns
///
/// Returns `Ok(Vec<ParaInfo>)` with one entry per paragraph, or an error if the
/// mutex cannot be acquired.
pub fn extract_all_paragraphs(&self) -> Result<Vec<ParaInfo>> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let level = TessPageIteratorLevel::RIL_PARA as c_int;
let mut paragraphs = Vec::new();
// SAFETY: `*handle` is a valid non-null TessPageIterator pointer owned by this struct.
// `TessPageIteratorBegin` resets the iterator to the first element; the mutex ensures
// exclusive access for the entire loop.
unsafe { TessPageIteratorBegin(*handle) };
loop {
let mut justification: c_int = 0;
// SAFETY: TessPageIteratorParagraphInfo expects BOOL* (int*) for is_list_item and
// is_crown. Rust bool is 1 byte while C int is 4 bytes, so we use c_int temporaries
// to avoid undefined behaviour (stack corruption) and convert afterwards.
let mut is_list_item_raw: c_int = 0;
let mut is_crown_raw: c_int = 0;
let mut first_line_indent: c_int = 0;
let para_ok = unsafe {
// SAFETY: `*handle` is valid; all output pointers reference stack variables
// whose lifetimes exceed this call. TessPageIteratorParagraphInfo writes
// through these pointers without retaining them.
TessPageIteratorParagraphInfo(
*handle,
&mut justification,
&mut is_list_item_raw,
&mut is_crown_raw,
&mut first_line_indent,
)
};
let is_list_item = is_list_item_raw != 0;
let is_crown = is_crown_raw != 0;
let mut left: c_int = 0;
let mut top: c_int = 0;
let mut right: c_int = 0;
let mut bottom: c_int = 0;
let bbox_ok = unsafe {
// SAFETY: `*handle` is valid; the four `*mut c_int` pointers reference local
// stack variables. TessPageIteratorBoundingBox does not retain these pointers.
TessPageIteratorBoundingBox(*handle, level, &mut left, &mut top, &mut right, &mut bottom)
};
if para_ok != 0 && bbox_ok != 0 {
paragraphs.push(ParaInfo {
justification: TessParagraphJustification::from_int(justification),
is_list_item,
is_crown,
first_line_indent,
left,
top,
right,
bottom,
});
}
let has_next = unsafe {
// SAFETY: `*handle` is valid; TessPageIteratorNext advances the iterator
// in-place and returns 0 when there are no more elements at this level.
TessPageIteratorNext(*handle, level)
};
if has_next == 0 {
break;
}
}
Ok(paragraphs)
}
/// Gets the paragraph information of the current iterator.
///
/// # Returns
///
/// Returns the paragraph information as a tuple if successful, otherwise returns an error.
pub fn paragraph_info(&self) -> Result<(TessParagraphJustification, bool, bool, i32)> {
let mut justification = 0;
// SAFETY: TessPageIteratorParagraphInfo expects BOOL* (int*) for is_list_item and
// is_crown. Rust bool is 1 byte while C int is 4 bytes, so we use c_int temporaries
// to avoid undefined behaviour (stack corruption) and convert afterwards.
let mut is_list_item_raw: c_int = 0;
let mut is_crown_raw: c_int = 0;
let mut first_line_indent = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let result = unsafe {
TessPageIteratorParagraphInfo(
*handle,
&mut justification,
&mut is_list_item_raw,
&mut is_crown_raw,
&mut first_line_indent,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
TessParagraphJustification::from_int(justification),
is_list_item_raw != 0,
is_crown_raw != 0,
first_line_indent,
))
}
}
}
impl Drop for PageIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessPageIteratorDelete(*handle) };
}
}
}
ffi_extern! {
pub fn TessPageIteratorDelete(handle: *mut c_void);
pub fn TessPageIteratorBegin(handle: *mut c_void);
pub fn TessPageIteratorNext(handle: *mut c_void, level: c_int) -> c_int;
pub fn TessPageIteratorIsAtBeginningOf(handle: *mut c_void, level: c_int) -> c_int;
pub fn TessPageIteratorIsAtFinalElement(handle: *mut c_void, level: c_int, element: c_int) -> c_int;
pub fn TessPageIteratorBoundingBox(
handle: *mut c_void,
level: c_int,
left: *mut c_int,
top: *mut c_int,
right: *mut c_int,
bottom: *mut c_int,
) -> c_int;
pub fn TessPageIteratorBlockType(handle: *mut c_void) -> c_int;
pub fn TessPageIteratorBaseline(
handle: *mut c_void,
level: c_int,
x1: *mut c_int,
y1: *mut c_int,
x2: *mut c_int,
y2: *mut c_int,
) -> c_int;
pub fn TessPageIteratorOrientation(
handle: *mut c_void,
orientation: *mut c_int,
writing_direction: *mut c_int,
textline_order: *mut c_int,
deskew_angle: *mut c_float,
) -> c_int;
pub fn TessBaseAPIGetIterator(handle: *mut c_void) -> *mut c_void;
pub fn TessPageIteratorParagraphInfo(
handle: *mut c_void,
justification: *mut c_int,
is_list_item: *mut c_int,
is_crown: *mut c_int,
first_line_indent: *mut c_int,
) -> c_int;
}

View File

@@ -0,0 +1,589 @@
use crate::api::TessDeleteText;
use crate::enums::TessPageIteratorLevel;
use crate::error::{Result, TesseractError};
use std::ffi::CStr;
use std::os::raw::{c_char, c_float, c_int, c_void};
use std::sync::{Arc, Mutex};
/// Font attributes detected by Tesseract for a word.
#[derive(Debug, Clone)]
pub struct FontAttributes {
pub is_bold: bool,
pub is_italic: bool,
pub is_underlined: bool,
pub is_monospace: bool,
pub is_serif: bool,
pub is_smallcaps: bool,
pub pointsize: i32,
pub font_id: i32,
}
/// Complete word data extracted in a single mutex lock.
#[derive(Debug, Clone)]
pub struct WordData {
pub text: String,
pub left: i32,
pub top: i32,
pub right: i32,
pub bottom: i32,
pub confidence: f32,
pub font_attrs: Option<FontAttributes>,
}
pub struct ResultIterator {
pub handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for ResultIterator {}
unsafe impl Sync for ResultIterator {}
impl ResultIterator {
/// Creates a new instance of the ResultIterator.
///
/// # Arguments
///
/// * `handle` - Pointer to the ResultIterator.
///
/// # Returns
///
/// Returns the new instance of the ResultIterator.
pub fn new(handle: *mut c_void) -> Self {
ResultIterator {
handle: Arc::new(Mutex::new(handle)),
}
}
/// Gets the UTF-8 text of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the text.
///
/// # Returns
///
/// Returns the UTF-8 text as a `String` if successful, otherwise returns an error.
pub fn get_utf8_text(&self, level: TessPageIteratorLevel) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorGetUTF8Text() allocates and returns a pointer to a C string.
// This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator (mutex-guarded)
// 2. level is a valid TessPageIteratorLevel enum converted to c_int (in valid range)
// 3. The returned pointer is either null (error) or a valid null-terminated C string
// allocated on Tesseract's heap (must be freed with TessDeleteText)
let text_ptr = unsafe { TessResultIteratorGetUTF8Text(*handle, level as c_int) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: We've verified text_ptr is non-null. The allocation/deallocation pattern is:
// 1. text_ptr was allocated by TessResultIteratorGetUTF8Text() on the FFI boundary
// 2. CStr::from_ptr(text_ptr) is safe: pointer is non-null and points to valid C string
// 3. We read from the string (to_str() creates temporary immutable borrow)
// 4. We immediately copy all data to owned String before deallocation
// 5. The string data remains valid until TessDeleteText is called
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let result = c_str.to_str()?.to_owned();
// SAFETY: TessDeleteText() deallocates memory allocated by TessResultIteratorGetUTF8Text():
// 1. text_ptr must be non-null (verified above)
// 2. text_ptr came from the Tesseract API (trusted source, correct allocation)
// 3. TessDeleteText() is the correct deallocation function for this allocation
// 4. Must be called exactly once per allocation to avoid double-free (we ensure this)
// 5. After this call, text_ptr is invalid; all uses must be via owned result String
unsafe { TessDeleteText(text_ptr as *mut c_char) };
Ok(result)
}
/// Gets the confidence of the current iterator.
///
/// # Arguments
///
/// * `level` - Level of the confidence.
///
/// # Returns
///
/// Returns the confidence as a `f32`.
pub fn confidence(&self, level: TessPageIteratorLevel) -> Result<f32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorConfidence() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. level is a valid TessPageIteratorLevel enum converted to c_int
// 3. The function only reads state and returns an f32 value (copyable)
// 4. No pointer operations or memory access is needed
Ok(unsafe { TessResultIteratorConfidence(*handle, level as c_int) })
}
/// Gets the recognition language of the current iterator.
///
/// # Returns
///
/// Returns the recognition language as a `String` if successful, otherwise returns an error.
pub fn word_recognition_language(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorWordRecognitionLanguage() returns a pointer to a C string
// in the iterator's memory. This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The returned pointer is either null or a valid null-terminated C string
let lang_ptr = unsafe { TessResultIteratorWordRecognitionLanguage(*handle) };
if lang_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: We've verified lang_ptr is non-null. CStr::from_ptr() is safe because:
// 1. lang_ptr points to a valid null-terminated C string managed by Tesseract
// 2. We only read from it (to_str() creates temporary borrow)
let c_str = unsafe { CStr::from_ptr(lang_ptr) };
Ok(c_str.to_str()?.to_owned())
}
/// Gets the font attributes of the current iterator.
///
/// # Returns
///
/// Returns the font attributes as a tuple if successful, otherwise returns an error.
pub fn word_font_attributes(&self) -> Result<(bool, bool, bool, bool, bool, bool, i32, i32)> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let mut is_bold = 0;
let mut is_italic = 0;
let mut is_underlined = 0;
let mut is_monospace = 0;
let mut is_serif = 0;
let mut is_smallcaps = 0;
let mut pointsize = 0;
let mut font_id = 0;
// SAFETY: TessResultIteratorWordFontAttributes() takes output parameter pointers
// and fills them with font attribute values. This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator (mutex-guarded)
// 2. All mutable references (&mut ...) are valid local stack variables
// 3. Each reference has a distinct memory location (no aliasing)
// 4. The references outlive the FFI call (defined on stack, used immediately after)
// 5. The function writes output i32 values (0/1 for bools, integers for size/id)
// 6. Each reference has exclusive mutable access (Rust borrow checker enforces this)
// 7. The output parameters are independent (function cannot cause data races)
let result = unsafe {
TessResultIteratorWordFontAttributes(
*handle,
&mut is_bold,
&mut is_italic,
&mut is_underlined,
&mut is_monospace,
&mut is_serif,
&mut is_smallcaps,
&mut pointsize,
&mut font_id,
)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((
is_bold != 0,
is_italic != 0,
is_underlined != 0,
is_monospace != 0,
is_serif != 0,
is_smallcaps != 0,
pointsize,
font_id,
))
}
}
/// Checks if the current iterator is from the dictionary.
///
/// # Returns
///
/// Returns `true` if the current iterator is from the dictionary, otherwise returns `false`.
pub fn word_is_from_dictionary(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorWordIsFromDictionary() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value (0 or non-zero)
// 3. No pointer operations or memory modifications are needed
Ok(unsafe { TessResultIteratorWordIsFromDictionary(*handle) != 0 })
}
/// Checks if the current iterator is numeric.
///
/// # Returns
///
/// Returns `true` if the current iterator is numeric, otherwise returns `false`.
pub fn word_is_numeric(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorWordIsNumeric() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorWordIsNumeric(*handle) != 0 })
}
/// Checks if the current iterator is superscript.
///
/// # Returns
///
/// Returns `true` if the current iterator is superscript, otherwise returns `false`.
pub fn symbol_is_superscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorSymbolIsSuperscript() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorSymbolIsSuperscript(*handle) != 0 })
}
/// Checks if the current iterator is subscript.
///
/// # Returns
///
/// Returns `true` if the current iterator is subscript, otherwise returns `false`.
pub fn symbol_is_subscript(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorSymbolIsSubscript() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorSymbolIsSubscript(*handle) != 0 })
}
/// Checks if the current iterator is dropcap.
///
/// # Returns
///
/// Returns `true` if the current iterator is dropcap, otherwise returns `false`.
pub fn symbol_is_dropcap(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorSymbolIsDropcap() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. The function only reads state and returns an i32 value
// 3. No pointer operations or state modifications needed
Ok(unsafe { TessResultIteratorSymbolIsDropcap(*handle) != 0 })
}
/// Moves to the next iterator.
///
/// # Arguments
///
/// * `level` - Level of the next iterator.
///
/// # Returns
///
/// Returns `true` if the next iterator exists, otherwise returns `false`.
pub fn next(&self, level: TessPageIteratorLevel) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessResultIteratorNext() is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator
// 2. level is a valid TessPageIteratorLevel enum converted to c_int
// 3. The function modifies iterator state (advances position) and returns i32 result
// 4. The mutex ensures exclusive access during state modification
Ok(unsafe { TessResultIteratorNext(*handle, level as c_int) != 0 })
}
/// Gets the current word from the iterator with its bounding box and confidence.
///
/// # Returns
///
/// Returns a tuple of (text, left, top, right, bottom, confidence) if successful
pub fn get_word_with_bounds(&self) -> Result<(String, i32, i32, i32, i32, f32)> {
let text = self.get_utf8_text(TessPageIteratorLevel::RIL_WORD)?;
let (left, top, right, bottom) = self.get_bounding_box(TessPageIteratorLevel::RIL_WORD)?;
let confidence = self.confidence(TessPageIteratorLevel::RIL_WORD)?;
Ok((text, left, top, right, bottom, confidence))
}
/// Advances the iterator to the next word.
///
/// # Returns
///
/// Returns true if successful, false if there are no more words
pub fn next_word(&self) -> Result<bool> {
self.next(TessPageIteratorLevel::RIL_WORD)
}
/// Gets the word information for the current position in the iterator.
/// Should be called before next() to ensure valid data.
///
/// # Returns
/// Returns a tuple of (text, left, top, right, bottom, confidence) if successful
pub fn get_current_word(&self) -> Result<(String, i32, i32, i32, i32, f32)> {
let text = self.get_utf8_text(TessPageIteratorLevel::RIL_WORD)?;
let (left, top, right, bottom) = self.get_bounding_box(TessPageIteratorLevel::RIL_WORD)?;
let confidence = self.confidence(TessPageIteratorLevel::RIL_WORD)?;
Ok((text, left, top, right, bottom, confidence))
}
/// Gets the bounding box for the current element.
pub fn get_bounding_box(&self, level: TessPageIteratorLevel) -> Result<(i32, i32, i32, i32)> {
let mut left = 0;
let mut top = 0;
let mut right = 0;
let mut bottom = 0;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
// SAFETY: TessPageIteratorBoundingBox() queries iterator state and returns coordinates
// via output parameters. This is safe because:
// 1. *handle is a valid pointer to an initialized ResultIterator or PageIterator (mutex-guarded)
// 2. level is a valid TessPageIteratorLevel enum converted to c_int (in valid range)
// 3. All mutable references (&mut left, &mut top, &mut right, &mut bottom)
// are valid local stack variables with distinct memory locations
// 4. Each reference is exclusively borrowed (Rust enforces no aliasing)
// 5. The references outlive the FFI call (defined on stack, used immediately after)
// 6. The function writes four i32 coordinate values into these references
// 7. No pointer escaping: the function only writes to these parameters, doesn't store them
// 8. Return value indicates success/failure (checked below)
let result = unsafe {
TessPageIteratorBoundingBox(*handle, level as c_int, &mut left, &mut top, &mut right, &mut bottom)
};
if result == 0 {
Err(TesseractError::InvalidParameterError)
} else {
Ok((left, top, right, bottom))
}
}
/// Extracts all word data from the iterator in a single mutex lock.
///
/// Acquires the mutex once and iterates all words, collecting text, bounding box,
/// confidence, and font attributes for each word. This is more efficient than
/// calling individual methods in a loop since it avoids repeated mutex acquisitions.
///
/// The iterator is always reset to the beginning before traversal so that partial
/// prior consumption does not cause words to be missed.
///
/// # Returns
///
/// Returns a `Vec<WordData>` containing data for every word, or an error if the
/// mutex cannot be acquired.
pub fn extract_all_words(&self) -> Result<Vec<WordData>> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let raw = *handle;
let mut words = Vec::new();
// Reset to the first element before traversal. ResultIterator inherits from
// PageIterator in C++, so TessPageIteratorBegin operates on the same handle.
// SAFETY: raw is a valid mutex-guarded ResultIterator pointer; TessPageIteratorBegin
// simply resets the internal position and does not allocate or free memory.
unsafe { TessPageIteratorBegin(raw) };
loop {
// SAFETY: raw is the mutex-guarded *mut c_void handle. All calls within this
// loop are performed while holding the mutex lock, ensuring exclusive access.
// We pass raw directly to the unlocked helper to avoid re-locking.
match extract_word_data_unlocked(raw) {
Ok(word) => words.push(word),
// NullPointerError means the text pointer was null; skip this position.
// InvalidParameterError means bounding box failed; skip this position.
// Utf8Error means the text was not valid UTF-8; skip this word rather than
// aborting, so the remaining words in the iterator are not lost.
Err(TesseractError::NullPointerError)
| Err(TesseractError::InvalidParameterError)
| Err(TesseractError::Utf8Error(_)) => {}
Err(e) => return Err(e),
}
// SAFETY: TessResultIteratorNext() advances the iterator state and returns
// non-zero if a next element exists. This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (mutex-guarded)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value
// 3. The mutex is held for the duration of this call (exclusive access)
// 4. The function modifies iterator position and returns an i32 result
let has_next = unsafe { TessResultIteratorNext(raw, TessPageIteratorLevel::RIL_WORD as c_int) != 0 };
if !has_next {
break;
}
}
Ok(words)
}
/// Extracts the current word's data in a single mutex lock.
///
/// Acquires the mutex once and calls all FFI functions (text, bounding box,
/// confidence, font attributes) within that lock scope. More efficient than
/// calling the individual methods separately when all fields are needed.
///
/// # Returns
///
/// Returns a [`WordData`] struct if successful, otherwise returns an error.
pub fn extract_word_data(&self) -> Result<WordData> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
extract_word_data_unlocked(*handle)
}
}
/// Extracts word data from a raw iterator handle without acquiring the mutex.
///
/// The caller MUST hold the mutex lock for the `ResultIterator` this handle belongs to
/// before calling this function. Passing a handle that is not mutex-guarded, or calling
/// this function concurrently on the same handle, is undefined behaviour.
fn extract_word_data_unlocked(raw: *mut c_void) -> Result<WordData> {
// SAFETY: TessResultIteratorGetUTF8Text() allocates and returns a pointer to a C string.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value converted to c_int
// 3. The returned pointer is either null (error) or a valid null-terminated C string
// allocated on Tesseract's heap (must be freed with TessDeleteText)
let text_ptr = unsafe { TessResultIteratorGetUTF8Text(raw, TessPageIteratorLevel::RIL_WORD as c_int) };
if text_ptr.is_null() {
return Err(TesseractError::NullPointerError);
}
// SAFETY: We've verified text_ptr is non-null. The allocation/deallocation pattern is:
// 1. text_ptr was allocated by TessResultIteratorGetUTF8Text() on the FFI boundary
// 2. CStr::from_ptr(text_ptr) is safe: pointer is non-null and points to valid C string
// 3. We immediately copy all data to an owned String before deallocation
// 4. The string data remains valid until TessDeleteText is called
let text = {
let c_str = unsafe { CStr::from_ptr(text_ptr) };
let owned = c_str.to_str()?.to_owned();
// SAFETY: TessDeleteText() deallocates memory allocated by TessResultIteratorGetUTF8Text():
// 1. text_ptr is non-null (verified above)
// 2. text_ptr came from the Tesseract API (correct allocation type)
// 3. TessDeleteText() is the correct deallocation function for this allocation
// 4. Called exactly once per allocation to avoid double-free
// 5. owned String was already populated; text_ptr is no longer accessed after this call
unsafe { TessDeleteText(text_ptr as *mut c_char) };
owned
};
let mut left = 0;
let mut top = 0;
let mut right = 0;
let mut bottom = 0;
// SAFETY: TessPageIteratorBoundingBox() queries iterator state and fills output parameters.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value converted to c_int
// 3. All mutable references are valid local stack variables with distinct memory locations
// 4. Each reference is exclusively borrowed (Rust enforces no aliasing)
// 5. The references outlive the FFI call (defined on stack, used immediately after)
// 6. Return value indicates success/failure (checked below)
let bbox_result = unsafe {
TessPageIteratorBoundingBox(
raw,
TessPageIteratorLevel::RIL_WORD as c_int,
&mut left,
&mut top,
&mut right,
&mut bottom,
)
};
if bbox_result == 0 {
return Err(TesseractError::InvalidParameterError);
}
// SAFETY: TessResultIteratorConfidence() reads iterator state and returns an f32 value.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. RIL_WORD is a valid TessPageIteratorLevel enum value converted to c_int
// 3. The function only reads state and returns a copy (no pointer operations)
let confidence = unsafe { TessResultIteratorConfidence(raw, TessPageIteratorLevel::RIL_WORD as c_int) };
// Collect font attributes; treat any failure as absent rather than propagating the error.
let font_attrs = {
let mut is_bold = 0;
let mut is_italic = 0;
let mut is_underlined = 0;
let mut is_monospace = 0;
let mut is_serif = 0;
let mut is_smallcaps = 0;
let mut pointsize = 0;
let mut font_id = 0;
// SAFETY: TessResultIteratorWordFontAttributes() fills output parameters with font info.
// This is safe because:
// 1. raw is a valid pointer to an initialized ResultIterator (caller holds mutex lock)
// 2. All mutable references are valid local stack variables with distinct memory locations
// 3. Each reference is exclusively borrowed (no aliasing)
// 4. The references outlive the FFI call
// 5. Return value is non-zero on success, zero on failure (checked below)
let result = unsafe {
TessResultIteratorWordFontAttributes(
raw,
&mut is_bold,
&mut is_italic,
&mut is_underlined,
&mut is_monospace,
&mut is_serif,
&mut is_smallcaps,
&mut pointsize,
&mut font_id,
)
};
if result != 0 {
Some(FontAttributes {
is_bold: is_bold != 0,
is_italic: is_italic != 0,
is_underlined: is_underlined != 0,
is_monospace: is_monospace != 0,
is_serif: is_serif != 0,
is_smallcaps: is_smallcaps != 0,
pointsize,
font_id,
})
} else {
None
}
};
Ok(WordData {
text,
left,
top,
right,
bottom,
confidence,
font_attrs,
})
}
impl Drop for ResultIterator {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
// SAFETY: TessResultIteratorDelete() frees the ResultIterator handle allocated by Tesseract:
// 1. We use .ok() pattern to handle poisoned mutex gracefully (no panic in Drop)
// 2. *handle is a valid opaque pointer allocated by TessBaseAPIGetIterator()
// or TessBaseAPIGetMutableIterator() - Tesseract owns this memory
// 3. TessResultIteratorDelete() is the single correct way to deallocate this type
// 4. The function must be called exactly once per allocation to avoid double-free
// 5. After calling delete, the pointer is invalid; future use would cause use-after-free
// 6. Drop impl never panics (we use .ok() guard), ensuring cleanup always executes
// 7. If mutex is poisoned, handle cleanup is skipped (OS will reclaim process memory)
unsafe { TessResultIteratorDelete(*handle) };
}
}
}
#[cfg(any(feature = "build-tesseract", feature = "build-tesseract-wasm"))]
ffi_extern! {
pub fn TessResultIteratorDelete(handle: *mut c_void);
pub fn TessPageIteratorBegin(handle: *mut c_void);
pub fn TessResultIteratorGetUTF8Text(handle: *mut c_void, level: c_int) -> *mut c_char;
pub fn TessResultIteratorConfidence(handle: *mut c_void, level: c_int) -> c_float;
pub fn TessResultIteratorWordRecognitionLanguage(handle: *mut c_void) -> *const c_char;
pub fn TessResultIteratorWordFontAttributes(
handle: *mut c_void,
is_bold: *mut c_int,
is_italic: *mut c_int,
is_underlined: *mut c_int,
is_monospace: *mut c_int,
is_serif: *mut c_int,
is_smallcaps: *mut c_int,
pointsize: *mut c_int,
font_id: *mut c_int,
) -> c_int;
pub fn TessResultIteratorWordIsFromDictionary(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorWordIsNumeric(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorSymbolIsSuperscript(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorSymbolIsSubscript(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorSymbolIsDropcap(handle: *mut c_void) -> c_int;
pub fn TessResultIteratorNext(handle: *mut c_void, level: c_int) -> c_int;
pub fn TessPageIteratorBoundingBox(
handle: *mut c_void,
level: c_int,
left: *mut c_int,
top: *mut c_int,
right: *mut c_int,
bottom: *mut c_int,
) -> c_int;
}

View File

@@ -0,0 +1,212 @@
use crate::TesseractAPI;
use crate::error::{Result, TesseractError};
use std::ffi::{CStr, CString};
use std::os::raw::{c_char, c_int, c_void};
use std::sync::Arc;
use std::sync::Mutex;
pub struct TessResultRenderer {
handle: Arc<Mutex<*mut c_void>>,
}
unsafe impl Send for TessResultRenderer {}
unsafe impl Sync for TessResultRenderer {}
impl TessResultRenderer {
/// Creates a new instance of the TessResultRenderer.
///
/// # Arguments
///
/// * `outputbase` - Output base path.
///
/// # Returns
///
/// Returns the new instance of the TessResultRenderer.
pub fn new_text_renderer(outputbase: &str) -> Result<Self> {
let outputbase = CString::new(outputbase).map_err(|_| TesseractError::NullByteInString)?;
let handle = unsafe { TessTextRendererCreate(outputbase.as_ptr()) };
if handle.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(TessResultRenderer {
handle: Arc::new(Mutex::new(handle)),
})
}
}
/// Creates a new instance of the TessResultRenderer for HOCR.
///
/// # Arguments
///
/// * `outputbase` - Output base path.
///
/// # Returns
///
/// Returns the new instance of the TessResultRenderer.
pub fn new_hocr_renderer(outputbase: &str) -> Result<Self> {
let outputbase = CString::new(outputbase).map_err(|_| TesseractError::NullByteInString)?;
let handle = unsafe { TessHOcrRendererCreate(outputbase.as_ptr()) };
if handle.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(TessResultRenderer {
handle: Arc::new(Mutex::new(handle)),
})
}
}
/// Creates a new instance of the TessResultRenderer for PDF.
///
/// # Arguments
///
/// * `outputbase` - Output base path.
/// * `datadir` - Data directory path.
/// * `textonly` - Whether to include text only.
///
/// # Returns
///
/// Returns the new instance of the TessResultRenderer.
pub fn new_pdf_renderer(outputbase: &str, datadir: &str, textonly: bool) -> Result<Self> {
let outputbase = CString::new(outputbase).map_err(|_| TesseractError::NullByteInString)?;
let datadir = CString::new(datadir).map_err(|_| TesseractError::NullByteInString)?;
let handle = unsafe { TessPDFRendererCreate(outputbase.as_ptr(), datadir.as_ptr(), textonly as c_int) };
if handle.is_null() {
Err(TesseractError::NullPointerError)
} else {
Ok(TessResultRenderer {
handle: Arc::new(Mutex::new(handle)),
})
}
}
/// Begins a new document.
///
/// # Arguments
///
/// * `title` - Title of the document.
///
/// # Returns
///
/// Returns `true` if the document was created successfully, otherwise returns `false`.
///
/// # Errors
///
/// Returns a `TesseractError` if the string contains a null byte or if the mutex lock fails.
pub fn begin_document(&self, title: &str) -> Result<bool> {
let title = CString::new(title).map_err(|_| TesseractError::NullByteInString)?;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererBeginDocument(*handle, title.as_ptr()) != 0 })
}
/// Adds an image to the document.
///
/// # Arguments
///
/// * `api` - The TesseractAPI instance.
///
/// # Returns
///
/// Returns `true` if the image was added successfully, otherwise returns `false`.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if either mutex lock fails.
pub fn add_image(&self, api: &TesseractAPI) -> Result<bool> {
let api_handle = api.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererAddImage(*handle, *api_handle) != 0 })
}
/// Ends the document.
///
/// # Returns
///
/// Returns `true` if the document was ended successfully, otherwise returns `false`.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn end_document(&self) -> Result<bool> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererEndDocument(*handle) != 0 })
}
/// Gets the extension of the document.
///
/// # Returns
///
/// Returns the extension as a `String` if successful, otherwise returns an error.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails,
/// `TesseractError::NullPointerError` if the extension pointer is null,
/// or `TesseractError::Utf8Error` if the extension contains invalid UTF-8.
pub fn get_extension(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let ext_ptr = unsafe { TessResultRendererExtention(*handle) };
if ext_ptr.is_null() {
Err(TesseractError::NullPointerError)
} else {
let c_str = unsafe { CStr::from_ptr(ext_ptr) };
Ok(c_str.to_str()?.to_owned())
}
}
/// Gets the title of the document.
///
/// # Returns
///
/// Returns the title as a `String` if successful, otherwise returns an error.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails,
/// `TesseractError::NullPointerError` if the title pointer is null,
/// or `TesseractError::Utf8Error` if the title contains invalid UTF-8.
pub fn get_title(&self) -> Result<String> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
let title_ptr = unsafe { TessResultRendererTitle(*handle) };
if title_ptr.is_null() {
Err(TesseractError::NullPointerError)
} else {
let c_str = unsafe { CStr::from_ptr(title_ptr) };
Ok(c_str.to_str()?.to_owned())
}
}
/// Gets the number of images in the document.
///
/// # Returns
///
/// Returns the number of images as an `i32`.
///
/// # Errors
///
/// Returns a `TesseractError::MutexLockError` if the mutex lock fails.
pub fn get_image_num(&self) -> Result<i32> {
let handle = self.handle.lock().map_err(|_| TesseractError::MutexLockError)?;
Ok(unsafe { TessResultRendererImageNum(*handle) })
}
}
impl Drop for TessResultRenderer {
fn drop(&mut self) {
if let Ok(handle) = self.handle.lock() {
unsafe { TessDeleteResultRenderer(*handle) };
}
}
}
ffi_extern! {
pub fn TessTextRendererCreate(outputbase: *const c_char) -> *mut c_void;
pub fn TessHOcrRendererCreate(outputbase: *const c_char) -> *mut c_void;
pub fn TessPDFRendererCreate(outputbase: *const c_char, datadir: *const c_char, textonly: c_int) -> *mut c_void;
pub fn TessDeleteResultRenderer(renderer: *mut c_void);
pub fn TessResultRendererBeginDocument(renderer: *mut c_void, title: *const c_char) -> c_int;
pub fn TessResultRendererAddImage(renderer: *mut c_void, api: *mut c_void) -> c_int;
pub fn TessResultRendererEndDocument(renderer: *mut c_void) -> c_int;
pub fn TessResultRendererExtention(renderer: *mut c_void) -> *const c_char;
pub fn TessResultRendererTitle(renderer: *mut c_void) -> *const c_char;
pub fn TessResultRendererImageNum(renderer: *mut c_void) -> c_int;
}