Files
fil/docs/snippets/rust/ocr/image_preprocessing.md

34 lines
949 B
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```rust title="Rust"
use kreuzberg::{
extract_file_sync, ExtractionConfig, ImagePreprocessingConfig, OcrConfig, TesseractConfig,
};
fn main() -> kreuzberg::Result<()> {
let preprocessing = ImagePreprocessingConfig {
target_dpi: 300,
denoise: true,
deskew: true,
contrast_enhance: true,
binarization_method: "otsu".to_string(),
..Default::default()
};
let config = ExtractionConfig {
ocr: Some(OcrConfig {
backend: "tesseract".to_string(),
language: "eng".to_string(),
tesseract_config: Some(TesseractConfig {
preprocessing: Some(preprocessing),
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
let result = extract_file_sync("document.pdf", None, &config)?;
println!("content length: {}", result.content.len());
Ok(())
}
```