Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
```rust title="Rust"
use kreuzberg::{
ChunkingConfig, ChunkerType, ExtractionConfig, ImageExtractionConfig,
OcrConfig, OutputFormat, extract_file_sync,
};
fn main() -> kreuzberg::Result<()> {
let config = ExtractionConfig {
// OCR: force Tesseract on all pages with English text
force_ocr: false,
ocr: Some(OcrConfig {
backend: "tesseract".to_string(),
language: "eng".to_string(),
..Default::default()
}),
// Chunking: semantic markdown chunks of ~800 chars, 100-char overlap
chunking: Some(ChunkingConfig {
max_characters: 800,
overlap: 100,
chunker_type: ChunkerType::Markdown,
prepend_heading_context: true,
..Default::default()
}),
// Output: include document structure and tables
output_format: OutputFormat::Markdown,
include_document_structure: true,
// Images: extract embedded images
images: Some(ImageExtractionConfig {
extract_images: true,
..Default::default()
}),
// Cache extracted results on disk
use_cache: true,
enable_quality_processing: true,
..Default::default()
};
let result = extract_file_sync("report.pdf", None, &config)?;
println!("Content ({} chars):", result.content.len());
println!("{}", &result.content[..result.content.len().min(200)]);
if let Some(chunks) = &result.chunks {
println!("\nChunks: {}", chunks.len());
}
println!("Tables: {}", result.tables.len());
if let Some(langs) = &result.detected_languages {
println!("Languages: {:?}", langs);
}
if let Some(method) = result.extraction_method {
println!("Extraction method: {:?}", method);
}
Ok(())
}
```