Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,165 @@
/// PDF points per inch constant
const PDF_POINTS_PER_INCH: f64 = 72.0;
/// Calculate smart DPI based on page dimensions, memory constraints, and target DPI
#[allow(clippy::cast_possible_truncation)]
pub(crate) fn calculate_smart_dpi(
page_width: f64,
page_height: f64,
target_dpi: i32,
max_dimension: i32,
max_memory_mb: f64,
) -> i32 {
let width_inches = page_width / PDF_POINTS_PER_INCH;
let height_inches = page_height / PDF_POINTS_PER_INCH;
let max_pixels = (max_memory_mb * 1024.0 * 1024.0 / 3.0).sqrt().round() as i32;
let max_dpi_for_memory_width = if width_inches > 0.0 {
(f64::from(max_pixels) / width_inches).round() as i32
} else {
target_dpi
};
let max_dpi_for_memory_height = if height_inches > 0.0 {
(f64::from(max_pixels) / height_inches).round() as i32
} else {
target_dpi
};
let memory_constrained_dpi = max_dpi_for_memory_width.min(max_dpi_for_memory_height);
let dimension_constrained_dpi =
calculate_dimension_constrained_dpi(width_inches, height_inches, target_dpi, max_dimension);
let final_dpi = target_dpi.min(memory_constrained_dpi).min(dimension_constrained_dpi);
final_dpi.max(72)
}
/// Calculate DPI constrained by maximum dimension
#[allow(clippy::cast_possible_truncation)]
fn calculate_dimension_constrained_dpi(
width_inches: f64,
height_inches: f64,
target_dpi: i32,
max_dimension: i32,
) -> i32 {
let target_width_pixels = (width_inches * f64::from(target_dpi)).round() as i32;
let target_height_pixels = (height_inches * f64::from(target_dpi)).round() as i32;
let max_pixel_dimension = target_width_pixels.max(target_height_pixels);
if max_pixel_dimension > max_dimension {
let max_dpi_for_width = if width_inches > 0.0 {
(f64::from(max_dimension) / width_inches).round() as i32
} else {
target_dpi
};
let max_dpi_for_height = if height_inches > 0.0 {
(f64::from(max_dimension) / height_inches).round() as i32
} else {
target_dpi
};
max_dpi_for_width.min(max_dpi_for_height)
} else {
target_dpi
}
}
/// Calculate optimal DPI with min/max constraints
#[cfg(test)]
pub(crate) fn calculate_optimal_dpi(
page_width: f64,
page_height: f64,
target_dpi: i32,
max_dimension: i32,
min_dpi: i32,
max_dpi: i32,
) -> i32 {
let smart_dpi = calculate_smart_dpi(page_width, page_height, target_dpi, max_dimension, 2048.0);
min_dpi.max(smart_dpi.min(max_dpi))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_calculate_smart_dpi_basic() {
let dpi = calculate_smart_dpi(612.0, 792.0, 300, 4096, 2048.0);
assert!(dpi >= 72);
assert!(dpi <= 300);
}
#[test]
fn test_calculate_smart_dpi_memory_constrained() {
let dpi = calculate_smart_dpi(1224.0, 1584.0, 300, 8192, 10.0);
assert!(dpi < 300);
assert!(dpi >= 72);
}
#[test]
fn test_calculate_smart_dpi_dimension_constrained() {
let dpi = calculate_smart_dpi(612.0, 792.0, 300, 1000, 2048.0);
assert!(dpi < 300);
}
#[test]
fn test_calculate_smart_dpi_minimum_dpi() {
let dpi = calculate_smart_dpi(10000.0, 10000.0, 300, 100, 1.0);
assert_eq!(dpi, 72);
}
#[test]
fn test_calculate_smart_dpi_zero_dimensions() {
let dpi = calculate_smart_dpi(0.0, 792.0, 300, 4096, 2048.0);
assert!(dpi >= 72);
let dpi = calculate_smart_dpi(612.0, 0.0, 300, 4096, 2048.0);
assert!(dpi >= 72);
let dpi = calculate_smart_dpi(0.0, 0.0, 300, 4096, 2048.0);
assert_eq!(dpi, 300);
}
#[test]
fn test_calculate_dimension_constrained_dpi() {
let dpi = calculate_dimension_constrained_dpi(8.5, 11.0, 300, 4096);
assert!(dpi <= 300);
let dpi = calculate_dimension_constrained_dpi(8.5, 11.0, 600, 2000);
assert!(dpi < 600);
}
#[test]
fn test_calculate_optimal_dpi() {
let dpi = calculate_optimal_dpi(612.0, 792.0, 300, 4096, 72, 600);
assert!(dpi >= 72);
assert!(dpi <= 600);
let dpi = calculate_optimal_dpi(10000.0, 10000.0, 300, 100, 100, 600);
assert_eq!(dpi, 100);
let dpi = calculate_optimal_dpi(72.0, 72.0, 1000, 10000, 72, 600);
assert_eq!(dpi, 600);
}
#[test]
fn test_memory_calculation() {
let dpi = calculate_smart_dpi(612.0, 792.0, 10000, 100000, 2048.0);
assert!(dpi < 10000);
assert!(dpi >= 72);
}
#[test]
fn test_aspect_ratio_preservation() {
let wide_dpi = calculate_smart_dpi(1224.0, 396.0, 300, 4096, 2048.0);
let tall_dpi = calculate_smart_dpi(396.0, 1224.0, 300, 4096, 2048.0);
assert!(wide_dpi >= 72);
assert!(tall_dpi >= 72);
}
}

View File

@@ -0,0 +1,5 @@
pub mod dpi;
pub mod preprocessing;
pub mod resize;
pub(crate) use preprocessing::normalize_image_dpi;

View File

@@ -0,0 +1,418 @@
use crate::error::{KreuzbergError, Result};
use crate::types::{ImageDpiConfig as ExtractionConfig, ImagePreprocessingMetadata};
use image::{DynamicImage, ImageBuffer, Rgb};
use super::dpi::calculate_smart_dpi;
use super::resize::resize_image;
const PDF_POINTS_PER_INCH: f64 = 72.0;
/// Result of image normalization
#[cfg_attr(alef, alef(skip))]
pub struct NormalizeResult {
/// Processed RGB image data (height * width * 3 bytes)
pub rgb_data: Vec<u8>,
/// Image dimensions (width, height)
pub dimensions: (usize, usize),
/// Preprocessing metadata
pub metadata: ImagePreprocessingMetadata,
}
/// Normalize image DPI based on extraction configuration
///
/// # Arguments
/// * `rgb_data` - RGB image data as a flat `Vec<u8>` (height * width * 3 bytes, row-major)
/// * `width` - Image width in pixels
/// * `height` - Image height in pixels
/// * `config` - Extraction configuration containing DPI settings
/// * `current_dpi` - Optional current DPI of the image (defaults to 72 if None)
///
/// # Returns
/// * `NormalizeResult` containing processed image data and metadata
pub(crate) fn normalize_image_dpi(
rgb_data: &[u8],
width: usize,
height: usize,
config: &ExtractionConfig,
current_dpi: Option<f64>,
) -> Result<NormalizeResult> {
if width > 65536 || height > 65536 {
return Err(KreuzbergError::validation(format!(
"Image dimensions {}x{} exceed maximum 65536x65536",
width, height
)));
}
let expected_size = height * width * 3;
if rgb_data.len() != expected_size {
return Err(KreuzbergError::validation(format!(
"RGB data size {} does not match expected size {} for {}x{} image",
rgb_data.len(),
expected_size,
width,
height
)));
}
let current_dpi = current_dpi.unwrap_or(PDF_POINTS_PER_INCH);
let original_dpi = (current_dpi, current_dpi);
let max_memory_mb = 2048.0;
let (target_dpi, auto_adjusted, calculated_dpi) =
calculate_target_dpi(width as u32, height as u32, current_dpi, config, max_memory_mb);
let scale_factor = f64::from(target_dpi) / current_dpi;
if !needs_resize(width as u32, height as u32, scale_factor, config) {
return Ok(create_skip_result(
rgb_data.to_vec(),
width,
height,
original_dpi,
config,
target_dpi,
scale_factor,
auto_adjusted,
calculated_dpi,
));
}
let (new_width, new_height, final_scale, dimension_clamped) =
calculate_new_dimensions(width as u32, height as u32, scale_factor, config);
perform_resize(
rgb_data,
width as u32,
height as u32,
new_width,
new_height,
final_scale,
original_dpi,
target_dpi,
auto_adjusted,
dimension_clamped,
calculated_dpi,
config,
)
}
/// Calculate target DPI based on configuration
fn calculate_target_dpi(
width: u32,
height: u32,
current_dpi: f64,
config: &ExtractionConfig,
max_memory_mb: f64,
) -> (i32, bool, Option<i32>) {
if config.auto_adjust_dpi {
let approx_width_points = f64::from(width) * PDF_POINTS_PER_INCH / current_dpi;
let approx_height_points = f64::from(height) * PDF_POINTS_PER_INCH / current_dpi;
let optimal_dpi = calculate_smart_dpi(
approx_width_points,
approx_height_points,
config.target_dpi,
config.max_image_dimension,
max_memory_mb,
);
(optimal_dpi, optimal_dpi != config.target_dpi, Some(optimal_dpi))
} else {
(config.target_dpi, false, None)
}
}
/// Check if resize is needed
fn needs_resize(width: u32, height: u32, scale_factor: f64, config: &ExtractionConfig) -> bool {
let max_dimension = width.max(height);
let exceeds_max = i32::try_from(max_dimension).map_or(true, |dim| dim > config.max_image_dimension);
(scale_factor - 1.0).abs() >= 0.05 || exceeds_max
}
/// Calculate new dimensions after scaling
#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
fn calculate_new_dimensions(
original_width: u32,
original_height: u32,
scale_factor: f64,
config: &ExtractionConfig,
) -> (u32, u32, f64, bool) {
let mut new_width = (f64::from(original_width) * scale_factor).round() as u32;
let mut new_height = (f64::from(original_height) * scale_factor).round() as u32;
let mut final_scale = scale_factor;
let mut dimension_clamped = false;
let max_new_dimension = new_width.max(new_height);
if let Ok(max_dim_i32) = i32::try_from(max_new_dimension)
&& max_dim_i32 > config.max_image_dimension
{
let dimension_scale = f64::from(config.max_image_dimension) / f64::from(max_new_dimension);
new_width = (f64::from(new_width) * dimension_scale).round() as u32;
new_height = (f64::from(new_height) * dimension_scale).round() as u32;
final_scale *= dimension_scale;
dimension_clamped = true;
}
(new_width, new_height, final_scale, dimension_clamped)
}
/// Create result when resize is skipped
#[allow(clippy::too_many_arguments)]
fn create_skip_result(
rgb_data: Vec<u8>,
width: usize,
height: usize,
original_dpi: (f64, f64),
config: &ExtractionConfig,
target_dpi: i32,
scale_factor: f64,
auto_adjusted: bool,
calculated_dpi: Option<i32>,
) -> NormalizeResult {
NormalizeResult {
rgb_data,
dimensions: (width, height),
metadata: ImagePreprocessingMetadata {
original_dimensions: (width, height),
original_dpi,
target_dpi: config.target_dpi,
scale_factor,
auto_adjusted,
final_dpi: target_dpi,
new_dimensions: None,
resample_method: "NONE".to_string(),
dimension_clamped: false,
calculated_dpi,
skipped_resize: true,
resize_error: None,
},
}
}
/// Perform the actual resize operation
#[allow(clippy::too_many_arguments)]
fn perform_resize(
rgb_data: &[u8],
original_width: u32,
original_height: u32,
new_width: u32,
new_height: u32,
final_scale: f64,
original_dpi: (f64, f64),
target_dpi: i32,
auto_adjusted: bool,
dimension_clamped: bool,
calculated_dpi: Option<i32>,
config: &ExtractionConfig,
) -> Result<NormalizeResult> {
let img_buffer = ImageBuffer::<Rgb<u8>, Vec<u8>>::from_raw(original_width, original_height, rgb_data.to_vec())
.ok_or_else(|| {
KreuzbergError::parsing(format!(
"Failed to create image buffer from {}x{} RGB data",
original_width, original_height
))
})?;
let image = DynamicImage::ImageRgb8(img_buffer);
let resized = resize_image(&image, new_width, new_height, final_scale)?;
let rgb_image = resized.to_rgb8();
let result_rgb_data = rgb_image.into_raw();
let metadata = ImagePreprocessingMetadata {
original_dimensions: (original_width as usize, original_height as usize),
original_dpi,
target_dpi: config.target_dpi,
scale_factor: final_scale,
auto_adjusted,
final_dpi: target_dpi,
new_dimensions: Some((new_width as usize, new_height as usize)),
resample_method: if final_scale < 1.0 { "LANCZOS3" } else { "CATMULLROM" }.to_string(),
dimension_clamped,
calculated_dpi,
skipped_resize: false,
resize_error: None,
};
Ok(NormalizeResult {
rgb_data: result_rgb_data,
dimensions: (new_width as usize, new_height as usize),
metadata,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_rgb_data(width: usize, height: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(width * height * 3);
for _ in 0..width * height {
data.push(255);
data.push(0);
data.push(0);
}
data
}
#[test]
fn test_normalize_image_dpi_skip_resize() {
let config = ExtractionConfig {
target_dpi: 72,
max_image_dimension: 4096,
auto_adjust_dpi: false,
min_dpi: 72,
max_dpi: 600,
};
let rgb_data = create_test_rgb_data(100, 100);
let result = normalize_image_dpi(&rgb_data, 100, 100, &config, Some(72.0));
assert!(result.is_ok());
let normalized = result.unwrap();
assert_eq!(normalized.dimensions, (100, 100));
assert!(normalized.metadata.skipped_resize);
}
#[test]
fn test_normalize_image_dpi_upscale() {
let config = ExtractionConfig {
target_dpi: 300,
max_image_dimension: 4096,
auto_adjust_dpi: false,
min_dpi: 72,
max_dpi: 600,
};
let rgb_data = create_test_rgb_data(100, 100);
let result = normalize_image_dpi(&rgb_data, 100, 100, &config, Some(72.0));
assert!(result.is_ok());
let normalized = result.unwrap();
assert!(!normalized.metadata.skipped_resize);
assert!(normalized.dimensions.0 > 100);
assert!(normalized.dimensions.1 > 100);
}
#[test]
fn test_normalize_image_dpi_downscale() {
let config = ExtractionConfig {
target_dpi: 72,
max_image_dimension: 4096,
auto_adjust_dpi: false,
min_dpi: 72,
max_dpi: 600,
};
let rgb_data = create_test_rgb_data(1000, 1000);
let result = normalize_image_dpi(&rgb_data, 1000, 1000, &config, Some(300.0));
assert!(result.is_ok());
let normalized = result.unwrap();
assert!(!normalized.metadata.skipped_resize);
assert!(normalized.dimensions.0 < 1000);
assert!(normalized.dimensions.1 < 1000);
}
#[test]
fn test_normalize_image_dpi_dimension_clamp() {
let config = ExtractionConfig {
target_dpi: 300,
max_image_dimension: 500,
auto_adjust_dpi: false,
min_dpi: 72,
max_dpi: 600,
};
let rgb_data = create_test_rgb_data(1000, 1000);
let result = normalize_image_dpi(&rgb_data, 1000, 1000, &config, Some(300.0));
assert!(result.is_ok());
let normalized = result.unwrap();
assert!(normalized.metadata.dimension_clamped);
assert!(normalized.dimensions.0 <= 500);
assert!(normalized.dimensions.1 <= 500);
}
#[test]
fn test_normalize_image_dpi_auto_adjust() {
let config = ExtractionConfig {
target_dpi: 300,
max_image_dimension: 4096,
auto_adjust_dpi: true,
min_dpi: 72,
max_dpi: 600,
};
let rgb_data = create_test_rgb_data(100, 100);
let result = normalize_image_dpi(&rgb_data, 100, 100, &config, Some(72.0));
assert!(result.is_ok());
let normalized = result.unwrap();
assert!(normalized.metadata.calculated_dpi.is_some());
}
#[test]
fn test_normalize_image_dpi_invalid_dimensions() {
let config = ExtractionConfig::default();
let rgb_data = create_test_rgb_data(100, 100);
let result = normalize_image_dpi(&rgb_data, 100000, 100000, &config, None);
assert!(result.is_err());
}
#[test]
fn test_normalize_image_dpi_invalid_data_size() {
let config = ExtractionConfig::default();
let rgb_data = vec![0u8; 100];
let result = normalize_image_dpi(&rgb_data, 100, 100, &config, None);
assert!(result.is_err());
}
#[test]
fn test_needs_resize_threshold() {
let config = ExtractionConfig {
target_dpi: 300,
max_image_dimension: 4096,
auto_adjust_dpi: false,
min_dpi: 72,
max_dpi: 600,
};
assert!(!needs_resize(100, 100, 1.02, &config));
assert!(needs_resize(100, 100, 1.10, &config));
}
#[test]
fn test_calculate_new_dimensions_no_clamp() {
let config = ExtractionConfig::default();
let (new_w, new_h, scale, clamped) = calculate_new_dimensions(100, 100, 2.0, &config);
assert_eq!(new_w, 200);
assert_eq!(new_h, 200);
assert!((scale - 2.0).abs() < 0.01);
assert!(!clamped);
}
#[test]
fn test_calculate_new_dimensions_with_clamp() {
let config = ExtractionConfig {
target_dpi: 300,
max_image_dimension: 100,
auto_adjust_dpi: false,
min_dpi: 72,
max_dpi: 600,
};
let (new_w, new_h, _scale, clamped) = calculate_new_dimensions(100, 100, 2.0, &config);
assert!(new_w <= 100);
assert!(new_h <= 100);
assert!(clamped);
}
}

View File

@@ -0,0 +1,94 @@
use crate::error::{KreuzbergError, Result};
use fast_image_resize::{FilterType, PixelType, ResizeAlg, ResizeOptions, Resizer, images::Image as FirImage};
use image::{DynamicImage, ImageBuffer, Rgb};
/// Resize an image using fast_image_resize with appropriate algorithm based on scale factor
pub(crate) fn resize_image(
image: &DynamicImage,
new_width: u32,
new_height: u32,
scale_factor: f64,
) -> Result<DynamicImage> {
let rgb_image = image.to_rgb8();
let (width, height) = rgb_image.dimensions();
let src_image = FirImage::from_vec_u8(width, height, rgb_image.into_raw(), PixelType::U8x3)
.map_err(|e| KreuzbergError::parsing(format!("Failed to create source image: {e:?}")))?;
let mut dst_image = FirImage::new(new_width, new_height, PixelType::U8x3);
let algorithm = if scale_factor < 1.0 {
ResizeAlg::Convolution(FilterType::Lanczos3)
} else {
ResizeAlg::Convolution(FilterType::CatmullRom)
};
let mut resizer = Resizer::new();
resizer
.resize(&src_image, &mut dst_image, &ResizeOptions::new().resize_alg(algorithm))
.map_err(|e| KreuzbergError::parsing(format!("Resize failed: {e:?}")))?;
let buffer = dst_image.into_vec();
let img_buffer = ImageBuffer::<Rgb<u8>, Vec<u8>>::from_raw(new_width, new_height, buffer)
.ok_or_else(|| KreuzbergError::parsing("Failed to create image buffer".to_string()))?;
Ok(DynamicImage::ImageRgb8(img_buffer))
}
#[cfg(test)]
mod tests {
use super::*;
use image::Rgb;
fn create_test_image() -> DynamicImage {
let mut img = ImageBuffer::new(100, 100);
for y in 0..100 {
for x in 0..100 {
img.put_pixel(x, y, Rgb([255u8, 0u8, 0u8]));
}
}
DynamicImage::ImageRgb8(img)
}
#[test]
fn test_resize_image_downscale() {
let img = create_test_image();
let result = resize_image(&img, 50, 50, 0.5);
assert!(result.is_ok());
let resized = result.unwrap();
assert_eq!(resized.width(), 50);
assert_eq!(resized.height(), 50);
}
#[test]
fn test_resize_image_upscale() {
let img = create_test_image();
let result = resize_image(&img, 200, 200, 2.0);
assert!(result.is_ok());
let resized = result.unwrap();
assert_eq!(resized.width(), 200);
assert_eq!(resized.height(), 200);
}
#[test]
fn test_resize_image_no_scale() {
let img = create_test_image();
let result = resize_image(&img, 100, 100, 1.0);
assert!(result.is_ok());
let resized = result.unwrap();
assert_eq!(resized.width(), 100);
assert_eq!(resized.height(), 100);
}
#[test]
fn test_resize_preserves_aspect_ratio() {
let img = create_test_image();
let result = resize_image(&img, 50, 50, 0.5);
assert!(result.is_ok());
let resized = result.unwrap();
let original_aspect = img.width() as f64 / img.height() as f64;
let resized_aspect = resized.width() as f64 / resized.height() as f64;
assert!((original_aspect - resized_aspect).abs() < 0.01);
}
}