This commit is contained in:
33
docs/snippets/rust/ocr/image_preprocessing.md
Normal file
33
docs/snippets/rust/ocr/image_preprocessing.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{
|
||||
extract_file_sync, ExtractionConfig, ImagePreprocessingConfig, OcrConfig, TesseractConfig,
|
||||
};
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
let preprocessing = ImagePreprocessingConfig {
|
||||
target_dpi: 300,
|
||||
denoise: true,
|
||||
deskew: true,
|
||||
contrast_enhance: true,
|
||||
binarization_method: "otsu".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let config = ExtractionConfig {
|
||||
ocr: Some(OcrConfig {
|
||||
backend: "tesseract".to_string(),
|
||||
language: "eng".to_string(),
|
||||
tesseract_config: Some(TesseractConfig {
|
||||
preprocessing: Some(preprocessing),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = extract_file_sync("document.pdf", None, &config)?;
|
||||
println!("content length: {}", result.content.len());
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user