use crate::error::{KreuzbergError, Result}; use crate::types::{ImageDpiConfig as ExtractionConfig, ImagePreprocessingMetadata}; use image::{DynamicImage, ImageBuffer, Rgb}; use super::dpi::calculate_smart_dpi; use super::resize::resize_image; const PDF_POINTS_PER_INCH: f64 = 72.0; /// Result of image normalization #[cfg_attr(alef, alef(skip))] pub struct NormalizeResult { /// Processed RGB image data (height * width * 3 bytes) pub rgb_data: Vec, /// Image dimensions (width, height) pub dimensions: (usize, usize), /// Preprocessing metadata pub metadata: ImagePreprocessingMetadata, } /// Normalize image DPI based on extraction configuration /// /// # Arguments /// * `rgb_data` - RGB image data as a flat `Vec` (height * width * 3 bytes, row-major) /// * `width` - Image width in pixels /// * `height` - Image height in pixels /// * `config` - Extraction configuration containing DPI settings /// * `current_dpi` - Optional current DPI of the image (defaults to 72 if None) /// /// # Returns /// * `NormalizeResult` containing processed image data and metadata pub(crate) fn normalize_image_dpi( rgb_data: &[u8], width: usize, height: usize, config: &ExtractionConfig, current_dpi: Option, ) -> Result { if width > 65536 || height > 65536 { return Err(KreuzbergError::validation(format!( "Image dimensions {}x{} exceed maximum 65536x65536", width, height ))); } let expected_size = height * width * 3; if rgb_data.len() != expected_size { return Err(KreuzbergError::validation(format!( "RGB data size {} does not match expected size {} for {}x{} image", rgb_data.len(), expected_size, width, height ))); } let current_dpi = current_dpi.unwrap_or(PDF_POINTS_PER_INCH); let original_dpi = (current_dpi, current_dpi); let max_memory_mb = 2048.0; let (target_dpi, auto_adjusted, calculated_dpi) = calculate_target_dpi(width as u32, height as u32, current_dpi, config, max_memory_mb); let scale_factor = f64::from(target_dpi) / current_dpi; if !needs_resize(width as u32, height as u32, scale_factor, config) { return Ok(create_skip_result( rgb_data.to_vec(), width, height, original_dpi, config, target_dpi, scale_factor, auto_adjusted, calculated_dpi, )); } let (new_width, new_height, final_scale, dimension_clamped) = calculate_new_dimensions(width as u32, height as u32, scale_factor, config); perform_resize( rgb_data, width as u32, height as u32, new_width, new_height, final_scale, original_dpi, target_dpi, auto_adjusted, dimension_clamped, calculated_dpi, config, ) } /// Calculate target DPI based on configuration fn calculate_target_dpi( width: u32, height: u32, current_dpi: f64, config: &ExtractionConfig, max_memory_mb: f64, ) -> (i32, bool, Option) { if config.auto_adjust_dpi { let approx_width_points = f64::from(width) * PDF_POINTS_PER_INCH / current_dpi; let approx_height_points = f64::from(height) * PDF_POINTS_PER_INCH / current_dpi; let optimal_dpi = calculate_smart_dpi( approx_width_points, approx_height_points, config.target_dpi, config.max_image_dimension, max_memory_mb, ); (optimal_dpi, optimal_dpi != config.target_dpi, Some(optimal_dpi)) } else { (config.target_dpi, false, None) } } /// Check if resize is needed fn needs_resize(width: u32, height: u32, scale_factor: f64, config: &ExtractionConfig) -> bool { let max_dimension = width.max(height); let exceeds_max = i32::try_from(max_dimension).map_or(true, |dim| dim > config.max_image_dimension); (scale_factor - 1.0).abs() >= 0.05 || exceeds_max } /// Calculate new dimensions after scaling #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] fn calculate_new_dimensions( original_width: u32, original_height: u32, scale_factor: f64, config: &ExtractionConfig, ) -> (u32, u32, f64, bool) { let mut new_width = (f64::from(original_width) * scale_factor).round() as u32; let mut new_height = (f64::from(original_height) * scale_factor).round() as u32; let mut final_scale = scale_factor; let mut dimension_clamped = false; let max_new_dimension = new_width.max(new_height); if let Ok(max_dim_i32) = i32::try_from(max_new_dimension) && max_dim_i32 > config.max_image_dimension { let dimension_scale = f64::from(config.max_image_dimension) / f64::from(max_new_dimension); new_width = (f64::from(new_width) * dimension_scale).round() as u32; new_height = (f64::from(new_height) * dimension_scale).round() as u32; final_scale *= dimension_scale; dimension_clamped = true; } (new_width, new_height, final_scale, dimension_clamped) } /// Create result when resize is skipped #[allow(clippy::too_many_arguments)] fn create_skip_result( rgb_data: Vec, width: usize, height: usize, original_dpi: (f64, f64), config: &ExtractionConfig, target_dpi: i32, scale_factor: f64, auto_adjusted: bool, calculated_dpi: Option, ) -> NormalizeResult { NormalizeResult { rgb_data, dimensions: (width, height), metadata: ImagePreprocessingMetadata { original_dimensions: (width, height), original_dpi, target_dpi: config.target_dpi, scale_factor, auto_adjusted, final_dpi: target_dpi, new_dimensions: None, resample_method: "NONE".to_string(), dimension_clamped: false, calculated_dpi, skipped_resize: true, resize_error: None, }, } } /// Perform the actual resize operation #[allow(clippy::too_many_arguments)] fn perform_resize( rgb_data: &[u8], original_width: u32, original_height: u32, new_width: u32, new_height: u32, final_scale: f64, original_dpi: (f64, f64), target_dpi: i32, auto_adjusted: bool, dimension_clamped: bool, calculated_dpi: Option, config: &ExtractionConfig, ) -> Result { let img_buffer = ImageBuffer::, Vec>::from_raw(original_width, original_height, rgb_data.to_vec()) .ok_or_else(|| { KreuzbergError::parsing(format!( "Failed to create image buffer from {}x{} RGB data", original_width, original_height )) })?; let image = DynamicImage::ImageRgb8(img_buffer); let resized = resize_image(&image, new_width, new_height, final_scale)?; let rgb_image = resized.to_rgb8(); let result_rgb_data = rgb_image.into_raw(); let metadata = ImagePreprocessingMetadata { original_dimensions: (original_width as usize, original_height as usize), original_dpi, target_dpi: config.target_dpi, scale_factor: final_scale, auto_adjusted, final_dpi: target_dpi, new_dimensions: Some((new_width as usize, new_height as usize)), resample_method: if final_scale < 1.0 { "LANCZOS3" } else { "CATMULLROM" }.to_string(), dimension_clamped, calculated_dpi, skipped_resize: false, resize_error: None, }; Ok(NormalizeResult { rgb_data: result_rgb_data, dimensions: (new_width as usize, new_height as usize), metadata, }) } #[cfg(test)] mod tests { use super::*; fn create_test_rgb_data(width: usize, height: usize) -> Vec { let mut data = Vec::with_capacity(width * height * 3); for _ in 0..width * height { data.push(255); data.push(0); data.push(0); } data } #[test] fn test_normalize_image_dpi_skip_resize() { let config = ExtractionConfig { target_dpi: 72, max_image_dimension: 4096, auto_adjust_dpi: false, min_dpi: 72, max_dpi: 600, }; let rgb_data = create_test_rgb_data(100, 100); let result = normalize_image_dpi(&rgb_data, 100, 100, &config, Some(72.0)); assert!(result.is_ok()); let normalized = result.unwrap(); assert_eq!(normalized.dimensions, (100, 100)); assert!(normalized.metadata.skipped_resize); } #[test] fn test_normalize_image_dpi_upscale() { let config = ExtractionConfig { target_dpi: 300, max_image_dimension: 4096, auto_adjust_dpi: false, min_dpi: 72, max_dpi: 600, }; let rgb_data = create_test_rgb_data(100, 100); let result = normalize_image_dpi(&rgb_data, 100, 100, &config, Some(72.0)); assert!(result.is_ok()); let normalized = result.unwrap(); assert!(!normalized.metadata.skipped_resize); assert!(normalized.dimensions.0 > 100); assert!(normalized.dimensions.1 > 100); } #[test] fn test_normalize_image_dpi_downscale() { let config = ExtractionConfig { target_dpi: 72, max_image_dimension: 4096, auto_adjust_dpi: false, min_dpi: 72, max_dpi: 600, }; let rgb_data = create_test_rgb_data(1000, 1000); let result = normalize_image_dpi(&rgb_data, 1000, 1000, &config, Some(300.0)); assert!(result.is_ok()); let normalized = result.unwrap(); assert!(!normalized.metadata.skipped_resize); assert!(normalized.dimensions.0 < 1000); assert!(normalized.dimensions.1 < 1000); } #[test] fn test_normalize_image_dpi_dimension_clamp() { let config = ExtractionConfig { target_dpi: 300, max_image_dimension: 500, auto_adjust_dpi: false, min_dpi: 72, max_dpi: 600, }; let rgb_data = create_test_rgb_data(1000, 1000); let result = normalize_image_dpi(&rgb_data, 1000, 1000, &config, Some(300.0)); assert!(result.is_ok()); let normalized = result.unwrap(); assert!(normalized.metadata.dimension_clamped); assert!(normalized.dimensions.0 <= 500); assert!(normalized.dimensions.1 <= 500); } #[test] fn test_normalize_image_dpi_auto_adjust() { let config = ExtractionConfig { target_dpi: 300, max_image_dimension: 4096, auto_adjust_dpi: true, min_dpi: 72, max_dpi: 600, }; let rgb_data = create_test_rgb_data(100, 100); let result = normalize_image_dpi(&rgb_data, 100, 100, &config, Some(72.0)); assert!(result.is_ok()); let normalized = result.unwrap(); assert!(normalized.metadata.calculated_dpi.is_some()); } #[test] fn test_normalize_image_dpi_invalid_dimensions() { let config = ExtractionConfig::default(); let rgb_data = create_test_rgb_data(100, 100); let result = normalize_image_dpi(&rgb_data, 100000, 100000, &config, None); assert!(result.is_err()); } #[test] fn test_normalize_image_dpi_invalid_data_size() { let config = ExtractionConfig::default(); let rgb_data = vec![0u8; 100]; let result = normalize_image_dpi(&rgb_data, 100, 100, &config, None); assert!(result.is_err()); } #[test] fn test_needs_resize_threshold() { let config = ExtractionConfig { target_dpi: 300, max_image_dimension: 4096, auto_adjust_dpi: false, min_dpi: 72, max_dpi: 600, }; assert!(!needs_resize(100, 100, 1.02, &config)); assert!(needs_resize(100, 100, 1.10, &config)); } #[test] fn test_calculate_new_dimensions_no_clamp() { let config = ExtractionConfig::default(); let (new_w, new_h, scale, clamped) = calculate_new_dimensions(100, 100, 2.0, &config); assert_eq!(new_w, 200); assert_eq!(new_h, 200); assert!((scale - 2.0).abs() < 0.01); assert!(!clamped); } #[test] fn test_calculate_new_dimensions_with_clamp() { let config = ExtractionConfig { target_dpi: 300, max_image_dimension: 100, auto_adjust_dpi: false, min_dpi: 72, max_dpi: 600, }; let (new_w, new_h, _scale, clamped) = calculate_new_dimensions(100, 100, 2.0, &config); assert!(new_w <= 100); assert!(new_h <= 100); assert!(clamped); } }