Files
fil/docs/snippets/rust/config/config_programmatic.md

30 lines
897 B
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```rust title="Rust"
use kreuzberg::{extract_file_sync, ChunkingConfig, ExtractionConfig, OcrConfig, TesseractConfig};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let config = ExtractionConfig {
use_cache: true,
ocr: Some(OcrConfig {
backend: "tesseract".to_string(),
language: "eng+deu".to_string(),
tesseract_config: Some(TesseractConfig {
psm: 6,
..Default::default()
}),
..Default::default()
}),
chunking: Some(ChunkingConfig {
max_characters: 1000,
overlap: 200,
..Default::default()
}),
enable_quality_processing: true,
..Default::default()
};
let result = extract_file_sync("document.pdf", None, &config)?;
println!("Content length: {}", result.content.len());
Ok(())
}
```