Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
```rust title="Rust"
use kreuzberg::{extract_file, ExtractionConfig, OcrConfig, ChunkingConfig, LanguageDetectionConfig, TokenReductionConfig, PostProcessorConfig, EmbeddingConfig, EmbeddingModelType};
use kreuzberg::keywords::{KeywordConfig, KeywordAlgorithm};
#[tokio::main]
async fn main() -> kreuzberg::Result<()> {
let config = ExtractionConfig {
use_cache: true,
enable_quality_processing: true,
ocr: Some(OcrConfig {
backend: "tesseract".to_string(),
language: "eng".to_string(),
tesseract_config: None,
output_format: None,
paddle_ocr_config: None,
element_config: None,
}),
chunking: Some(ChunkingConfig {
max_characters: 1000,
overlap: 200,
embedding: Some(EmbeddingConfig {
model: EmbeddingModelType::Preset { name: "balanced".to_string() },
batch_size: 32,
normalize: true,
show_download_progress: false,
cache_dir: None,
}),
..Default::default()
}),
language_detection: Some(LanguageDetectionConfig {
enabled: true,
min_confidence: 0.8,
detect_multiple: false,
}),
keywords: Some(KeywordConfig {
algorithm: KeywordAlgorithm::Yake,
max_keywords: 10,
min_score: 0.1,
ngram_range: (1, 3),
language: Some("en".to_string()),
..Default::default()
}),
token_reduction: Some(TokenReductionConfig {
mode: "moderate".to_string(),
preserve_important_words: true,
}),
postprocessor: Some(PostProcessorConfig {
enabled: true,
enabled_processors: None,
disabled_processors: None,
}),
..Default::default()
};
let result = extract_file("document.pdf", None::<&str>, &config).await?;
println!("Content: {}", result.content);
if let Some(langs) = &result.detected_languages {
println!("Languages: {:?}", langs);
}
println!("Chunks: {}", result.chunks.as_ref().map(|c| c.len()).unwrap_or(0));
Ok(())
}
```