//! Diagnostic test to trace PaddleOCR detection pipeline. //! //! This test isolates each step to determine where empty results originate. //! Since this crate doesn't have PNG/image decoder features, we create test //! images programmatically. use std::path::PathBuf; fn get_workspace_root() -> PathBuf { let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); manifest_dir.parent().unwrap().parent().unwrap().to_path_buf() } fn get_model_dir() -> PathBuf { get_workspace_root().join(".kreuzberg/paddle-ocr") } /// Create a simple test image with black text "HELLO" on white background. /// This avoids needing PNG decoder features. fn create_test_image() -> image::RgbImage { let width = 200u32; let height = 100u32; let mut img = image::RgbImage::from_pixel(width, height, image::Rgb([255, 255, 255])); // Draw a thick black rectangle to simulate text (a simple "block" pattern) // This ensures the detection model has SOMETHING to detect let black = image::Rgb([0, 0, 0]); // Draw "H" shape (x: 20-60, y: 20-80) for y in 20..80 { img.put_pixel(20, y, black); img.put_pixel(21, y, black); img.put_pixel(22, y, black); } for y in 20..80 { img.put_pixel(55, y, black); img.put_pixel(56, y, black); img.put_pixel(57, y, black); } for x in 20..58 { img.put_pixel(x, 48, black); img.put_pixel(x, 49, black); img.put_pixel(x, 50, black); } // Draw thick solid block to be very obvious (x: 80-180, y: 30-70) for y in 30..70 { for x in 80..180 { img.put_pixel(x, y, black); } } img } #[test] fn diagnostic_detection_pipeline() { let model_dir = get_model_dir(); if !model_dir.join("det/model.onnx").exists() { eprintln!("SKIP: Models not downloaded at {:?}", model_dir); return; } // Discover ORT library discover_ort(); eprintln!("=== PaddleOCR Diagnostic Test ==="); eprintln!("Model dir: {:?}", model_dir); // Step 1: Create test image let img = create_test_image(); eprintln!("Step 1 - Test image created: {}x{}", img.width(), img.height()); // Step 2: Initialize OcrLite let mut ocr_lite = kreuzberg_paddle_ocr::OcrLite::new(); let det_path = model_dir.join("det/model.onnx"); let cls_path = model_dir.join("cls/model.onnx"); let rec_path = model_dir.join("rec/model.onnx"); let init_result = ocr_lite.init_models( det_path.to_str().unwrap(), cls_path.to_str().unwrap(), rec_path.to_str().unwrap(), 1, ); match &init_result { Ok(()) => eprintln!("Step 2 - Models initialized successfully"), Err(e) => { eprintln!("Step 2 - FAILED to init models: {:?}", e); panic!("Model initialization failed: {:?}", e); } } // Step 3: Run detection with various parameter sets let test_cases = vec![ ("A: Default params", 50u32, 960u32, 0.3f32, 0.5f32, 1.6f32, true, false), ("B: Very low thresholds", 50, 960, 0.01, 0.01, 1.6, false, false), ("C: No padding + low", 0, 960, 0.01, 0.01, 1.6, false, false), ("D: Higher unclip ratio", 50, 960, 0.1, 0.1, 3.0, false, false), ("E: No padding + medium", 0, 960, 0.1, 0.3, 2.0, false, false), ]; let mut any_detected = false; for (name, padding, max_side, box_score, box_thresh, unclip, do_angle, most_angle) in &test_cases { eprintln!("\n--- Test {} ---", name); eprintln!( " padding={}, max_side={}, box_score={}, box_thresh={}, unclip={}", padding, max_side, box_score, box_thresh, unclip ); let result = ocr_lite.detect( &img, *padding, *max_side, *box_score, *box_thresh, *unclip, *do_angle, *most_angle, ); match &result { Ok(ocr_result) => { eprintln!(" Result: {} text blocks", ocr_result.text_blocks.len()); for (i, block) in ocr_result.text_blocks.iter().enumerate() { eprintln!( " Block {}: text='{}', text_score={:.3}, box_score={:.3}", i, block.text, block.text_score, block.box_score ); any_detected = true; } } Err(e) => { eprintln!(" FAILED: {:?}", e); } } } eprintln!("\n=== Diagnosis ==="); if !any_detected { eprintln!("RESULT: Detection model produces NO output regardless of thresholds."); eprintln!("This strongly suggests an ORT version compatibility issue."); eprintln!(" ort crate version: check Cargo.lock for current version"); eprintln!(" ORT_DYLIB_PATH: {:?}", std::env::var("ORT_DYLIB_PATH")); } else { eprintln!("RESULT: Detection works. Issue may be threshold-related or image-specific."); } } /// Also test with raw ONNX inference to check if ORT works at all. #[test] fn diagnostic_raw_ort_inference() { let model_dir = get_model_dir(); let det_model = model_dir.join("det/model.onnx"); if !det_model.exists() { eprintln!("SKIP: Detection model not found at {:?}", det_model); return; } discover_ort(); eprintln!("=== Raw ORT Inference Test ==="); // Load model directly via ort use ort::session::Session; let mut session = Session::builder().unwrap().commit_from_file(&det_model).unwrap(); eprintln!("Model loaded successfully"); eprintln!("Inputs:"); for input in session.inputs() { eprintln!(" name='{}'", input.name()); } eprintln!("Outputs:"); for output in session.outputs() { eprintln!(" name='{}'", output.name()); } // Create a small 32x32 test tensor (NCHW format: batch=1, channels=3, h=32, w=32) let input_data: Vec = vec![0.5; 3 * 32 * 32]; let tensor = ort::value::Tensor::from_array(ndarray::Array::from_shape_vec((1, 3, 32, 32), input_data).unwrap()).unwrap(); let input_name = session.inputs()[0].name().to_string(); eprintln!("\nRunning inference with 32x32 gray image..."); let outputs = session.run(ort::inputs![input_name => tensor]).unwrap(); // Check output let (output_name, output_value) = outputs.iter().next().unwrap(); eprintln!("Output name: {}", output_name); let output_tensor = output_value.try_extract_tensor::().unwrap(); let output_shape = output_tensor.0; let output_data = output_tensor.1; eprintln!("Output shape: {:?}", output_shape); eprintln!("Output len: {}", output_data.len()); if !output_data.is_empty() { let min = output_data.iter().cloned().fold(f32::INFINITY, f32::min); let max = output_data.iter().cloned().fold(f32::NEG_INFINITY, f32::max); let sum: f32 = output_data.iter().sum(); let mean = sum / output_data.len() as f32; let non_zero = output_data.iter().filter(|&&v| v > 0.001).count(); eprintln!("Output stats: min={:.6}, max={:.6}, mean={:.6}", min, max, mean); eprintln!("Non-zero values (>0.001): {} / {}", non_zero, output_data.len()); if max < 0.001 { eprintln!("\nDIAGNOSIS: Model outputs are essentially all zeros."); eprintln!("This confirms an ORT compatibility issue - model isn't executing correctly."); } else { eprintln!("\nDIAGNOSIS: Model produces non-zero output. ORT is working."); } } } /// Diagnostic: test the CRNN recognition model directly. #[test] fn diagnostic_crnn_model_output() { let model_dir = get_model_dir(); let rec_model = model_dir.join("rec/model.onnx"); if !rec_model.exists() { eprintln!("SKIP: Recognition model not found"); return; } discover_ort(); eprintln!("=== CRNN Recognition Model Diagnostic ==="); use ort::session::Session; let mut session = Session::builder().unwrap().commit_from_file(&rec_model).unwrap(); eprintln!("Model loaded successfully"); eprintln!("Inputs:"); for input in session.inputs() { eprintln!(" name='{}'", input.name()); } eprintln!("Outputs:"); for output in session.outputs() { eprintln!(" name='{}'", output.name()); } // Check metadata for character list { let metadata = session.metadata().unwrap(); // Check all metadata custom keys eprintln!("Model metadata:"); eprintln!(" description: {:?}", metadata.description()); eprintln!(" producer: {:?}", metadata.producer()); // Try to get the character key match metadata.custom("character") { Some(chars) => { let bytes = chars.as_bytes(); let char_count = chars.split('\n').count(); eprintln!( " custom('character'): len={}, bytes={}, split_count={}", chars.len(), bytes.len(), char_count ); if chars.len() < 500 { eprintln!(" value: {:?}", chars); } else { let preview: String = chars.chars().take(100).collect(); eprintln!(" preview (first 100 chars): {:?}", preview); } // Check for null bytes or other encoding issues let null_count = bytes.iter().filter(|&&b| b == 0).count(); if null_count > 0 { eprintln!(" WARNING: {} null bytes found in character string!", null_count); } } None => { eprintln!(" ERROR: No 'character' key in model metadata!"); } } // Try other possible metadata keys for key in [ "character", "characters", "dict", "dictionary", "labels", "vocab", "alphabet", ] { if let Some(val) = metadata.custom(key) { eprintln!( " custom('{}'): len={}, preview={:?}", key, val.len(), &val[..val.len().min(80)] ); } } } // metadata dropped here // Test 1: Run inference with a simple input (height=48, width=200) // CRNN expects NCHW: [1, 3, 48, width] let h = 48usize; let w = 200usize; // Create a pattern that looks like text (alternating black/white vertical stripes) let mut input_data: Vec = vec![0.0; 3 * h * w]; for c in 0..3 { for y in 10..38 { for x in (20..180).step_by(2) { input_data[c * h * w + y * w + x] = -1.0; // normalized black } } } let tensor = ort::value::Tensor::from_array(ndarray::Array::from_shape_vec((1, 3, h, w), input_data).unwrap()).unwrap(); let input_name = session.inputs()[0].name().to_string(); eprintln!("\nRunning CRNN with striped pattern (48x200)..."); let outputs = session.run(ort::inputs![input_name => tensor]).unwrap(); let (_, output_value) = outputs.iter().next().unwrap(); let (shape, data) = output_value.try_extract_tensor::().unwrap(); eprintln!("Output shape: {:?}", shape); eprintln!("Output total values: {}", data.len()); if shape.len() >= 3 { let time_steps = shape[1] as usize; let vocab_size = shape[2] as usize; eprintln!("Time steps: {}, Vocabulary size: {}", time_steps, vocab_size); // Check if outputs are meaningful let data_vec: Vec = data.to_vec(); let min = data_vec.iter().cloned().fold(f32::INFINITY, f32::min); let max = data_vec.iter().cloned().fold(f32::NEG_INFINITY, f32::max); let mean: f32 = data_vec.iter().sum::() / data_vec.len() as f32; eprintln!("Overall stats: min={:.6}, max={:.6}, mean={:.6}", min, max, mean); // Check argmax distribution let mut argmax_zero_count = 0; let mut argmax_nonzero_count = 0; for t in 0..time_steps { let start = t * vocab_size; let end = start + vocab_size; let slice = &data_vec[start..end.min(data_vec.len())]; let (max_idx, max_val) = slice.iter().enumerate().fold( (0, f32::MIN), |(mi, mv), (i, &v)| if v > mv { (i, v) } else { (mi, mv) }, ); if max_idx == 0 { argmax_zero_count += 1; } else { argmax_nonzero_count += 1; } if t < 5 || (t > time_steps - 3) { eprintln!(" Step {}: argmax={}, max_val={:.4}", t, max_idx, max_val); } else if t == 5 { eprintln!(" ... (skipping middle steps)"); } } eprintln!( "\nArgmax distribution: {} blank (idx=0), {} non-blank", argmax_zero_count, argmax_nonzero_count ); if argmax_nonzero_count == 0 { eprintln!("\nDIAGNOSIS: CRNN model outputs all blanks."); eprintln!("Possible causes:"); eprintln!(" 1. ORT version incompatibility with CRNN model"); eprintln!(" 2. Model is not executing graph correctly"); eprintln!(" 3. Input normalization mismatch"); } else { eprintln!("\nDIAGNOSIS: CRNN model produces non-blank output. Recognition works."); } } // Drop outputs before reusing session drop(outputs); // Test 2: Run with a uniform white image (should produce all blanks - valid baseline) let white_data: Vec = vec![1.0; 3 * h * w]; let white_tensor = ort::value::Tensor::from_array(ndarray::Array::from_shape_vec((1, 3, h, w), white_data).unwrap()).unwrap(); let input_name2 = session.inputs()[0].name().to_string(); eprintln!("\nRunning CRNN with uniform white (48x200)..."); let white_outputs = session.run(ort::inputs![input_name2 => white_tensor]).unwrap(); let (_, white_val) = white_outputs.iter().next().unwrap(); let (_, white_data_out) = white_val.try_extract_tensor::().unwrap(); let white_vec: Vec = white_data_out.to_vec(); let white_max = white_vec.iter().cloned().fold(f32::NEG_INFINITY, f32::max); let white_min = white_vec.iter().cloned().fold(f32::INFINITY, f32::min); eprintln!("White image output: min={:.6}, max={:.6}", white_min, white_max); } fn discover_ort() { if let Ok(path) = std::env::var("ORT_DYLIB_PATH") && std::path::Path::new(&path).exists() { eprintln!("ORT found via ORT_DYLIB_PATH: {}", path); return; } let candidates = [ "/opt/homebrew/lib/libonnxruntime.dylib", "/usr/local/lib/libonnxruntime.dylib", ]; for candidate in &candidates { if std::path::Path::new(candidate).exists() { eprintln!("Setting ORT_DYLIB_PATH={}", candidate); unsafe { std::env::set_var("ORT_DYLIB_PATH", candidate) }; return; } } eprintln!("WARNING: Could not find ORT library!"); }