21 lines
595 B
Markdown
21 lines
595 B
Markdown
|
|
```rust title="Rust"
|
||
|
|
use kreuzberg::{extract_file_sync, ExtractionConfig, OcrConfig};
|
||
|
|
|
||
|
|
fn main() -> kreuzberg::Result<()> {
|
||
|
|
let config = ExtractionConfig {
|
||
|
|
ocr: Some(OcrConfig {
|
||
|
|
backend: "tesseract".to_string(),
|
||
|
|
language: "eng".to_string(),
|
||
|
|
tesseract_config: None,
|
||
|
|
..Default::default()
|
||
|
|
}),
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
|
||
|
|
let result = extract_file_sync("scanned.pdf", None, &config)?;
|
||
|
|
println!("Content length: {}", result.content.len());
|
||
|
|
println!("Tables detected: {}", result.tables.len());
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
```
|