Files
fil/docs/snippets/rust/ocr/image_preprocessing.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

949 B

use kreuzberg::{
    extract_file_sync, ExtractionConfig, ImagePreprocessingConfig, OcrConfig, TesseractConfig,
};

fn main() -> kreuzberg::Result<()> {
    let preprocessing = ImagePreprocessingConfig {
        target_dpi: 300,
        denoise: true,
        deskew: true,
        contrast_enhance: true,
        binarization_method: "otsu".to_string(),
        ..Default::default()
    };

    let config = ExtractionConfig {
        ocr: Some(OcrConfig {
            backend: "tesseract".to_string(),
            language: "eng".to_string(),
            tesseract_config: Some(TesseractConfig {
                preprocessing: Some(preprocessing),
                ..Default::default()
            }),
            ..Default::default()
        }),
        ..Default::default()
    };

    let result = extract_file_sync("document.pdf", None, &config)?;
    println!("content length: {}", result.content.len());
    Ok(())
}