Files

37 lines
1.0 KiB
Markdown
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
```rust title="Rust"
use kreuzberg::{extract_file, ExtractionConfig, ChunkingConfig, EmbeddingConfig};
let config = ExtractionConfig {
chunking: Some(ChunkingConfig {
max_characters: 500,
overlap: 50,
embedding: Some(EmbeddingConfig {
model: "balanced".to_string(),
normalize: true,
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
let result = extract_file("research_paper.pdf", None, &config).await?;
if let Some(chunks) = result.chunks {
for chunk in chunks {
println!("Chunk {}/{}",
chunk.metadata.chunk_index + 1,
chunk.metadata.total_chunks
);
println!("Position: {}-{}",
chunk.metadata.byte_start,
chunk.metadata.byte_end
);
println!("Content: {}...", &chunk.content[..100.min(chunk.content.len())]);
if let Some(embedding) = chunk.embedding {
println!("Embedding: {} dimensions", embedding.len());
}
}
}
```