This commit is contained in:
25
docs/snippets/rust/api/batch_extract_bytes_sync.md
Normal file
25
docs/snippets/rust/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{batch_extract_bytes_sync, BatchBytesItem, ExtractionConfig};
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
let config = ExtractionConfig::default();
|
||||
let items = vec![
|
||||
BatchBytesItem {
|
||||
content: b"Hello, world!".to_vec(),
|
||||
mime_type: "text/plain".to_string(),
|
||||
config: None,
|
||||
},
|
||||
BatchBytesItem {
|
||||
content: b"# Heading\n\nParagraph text.".to_vec(),
|
||||
mime_type: "text/markdown".to_string(),
|
||||
config: None,
|
||||
},
|
||||
];
|
||||
let results = batch_extract_bytes_sync(items, &config)?;
|
||||
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
println!("Item {}: {} chars", i, result.content.len());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
18
docs/snippets/rust/api/batch_extract_files_sync.md
Normal file
18
docs/snippets/rust/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{batch_extract_files_sync, BatchFileItem, ExtractionConfig};
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
let config = ExtractionConfig::default();
|
||||
let items = vec![
|
||||
BatchFileItem { path: "doc1.pdf".into(), config: None },
|
||||
BatchFileItem { path: "doc2.docx".into(), config: None },
|
||||
BatchFileItem { path: "report.pdf".into(), config: None },
|
||||
];
|
||||
let results = batch_extract_files_sync(items, &config)?;
|
||||
|
||||
for (i, result) in results.iter().enumerate() {
|
||||
println!("File {}: {} chars", i, result.content.len());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
29
docs/snippets/rust/api/client_chunk_text.md
Normal file
29
docs/snippets/rust/api/client_chunk_text.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```rust title="Rust"
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let client = reqwest::Client::new();
|
||||
let bytes = tokio::fs::read("document.pdf").await?;
|
||||
|
||||
let part = reqwest::multipart::Part::bytes(bytes)
|
||||
.file_name("document.pdf")
|
||||
.mime_str("application/pdf")?;
|
||||
let form = reqwest::multipart::Form::new()
|
||||
.part("file", part)
|
||||
.text("chunking", r#"{"max_characters":800,"overlap":100}"#);
|
||||
|
||||
let response = client
|
||||
.post("http://localhost:8000/extract")
|
||||
.multipart(form)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let result: serde_json::Value = response.error_for_status()?.json().await?;
|
||||
if let Some(chunks) = result["chunks"].as_array() {
|
||||
println!("{} chunks", chunks.len());
|
||||
for chunk in chunks {
|
||||
println!(" {} chars", chunk["content"].as_str().unwrap_or("").len());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
28
docs/snippets/rust/api/client_extract_single_file.md
Normal file
28
docs/snippets/rust/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,28 @@
|
||||
```rust title="Rust"
|
||||
use std::path::Path;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let client = reqwest::Client::new();
|
||||
let bytes = tokio::fs::read("document.pdf").await?;
|
||||
let file_name = Path::new("document.pdf")
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("document.pdf");
|
||||
|
||||
let part = reqwest::multipart::Part::bytes(bytes)
|
||||
.file_name(file_name.to_string())
|
||||
.mime_str("application/pdf")?;
|
||||
let form = reqwest::multipart::Form::new().part("file", part);
|
||||
|
||||
let response = client
|
||||
.post("http://localhost:8000/extract")
|
||||
.multipart(form)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let result: serde_json::Value = response.error_for_status()?.json().await?;
|
||||
println!("{}", result["content"].as_str().unwrap_or(""));
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
55
docs/snippets/rust/api/combining_all_features.md
Normal file
55
docs/snippets/rust/api/combining_all_features.md
Normal file
@@ -0,0 +1,55 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{
|
||||
ChunkingConfig, ChunkerType, ExtractionConfig, ImageExtractionConfig,
|
||||
OcrConfig, OutputFormat, extract_file_sync,
|
||||
};
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
let config = ExtractionConfig {
|
||||
// OCR: force Tesseract on all pages with English text
|
||||
force_ocr: false,
|
||||
ocr: Some(OcrConfig {
|
||||
backend: "tesseract".to_string(),
|
||||
language: "eng".to_string(),
|
||||
..Default::default()
|
||||
}),
|
||||
// Chunking: semantic markdown chunks of ~800 chars, 100-char overlap
|
||||
chunking: Some(ChunkingConfig {
|
||||
max_characters: 800,
|
||||
overlap: 100,
|
||||
chunker_type: ChunkerType::Markdown,
|
||||
prepend_heading_context: true,
|
||||
..Default::default()
|
||||
}),
|
||||
// Output: include document structure and tables
|
||||
output_format: OutputFormat::Markdown,
|
||||
include_document_structure: true,
|
||||
// Images: extract embedded images
|
||||
images: Some(ImageExtractionConfig {
|
||||
extract_images: true,
|
||||
..Default::default()
|
||||
}),
|
||||
// Cache extracted results on disk
|
||||
use_cache: true,
|
||||
enable_quality_processing: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = extract_file_sync("report.pdf", None, &config)?;
|
||||
|
||||
println!("Content ({} chars):", result.content.len());
|
||||
println!("{}", &result.content[..result.content.len().min(200)]);
|
||||
|
||||
if let Some(chunks) = &result.chunks {
|
||||
println!("\nChunks: {}", chunks.len());
|
||||
}
|
||||
println!("Tables: {}", result.tables.len());
|
||||
if let Some(langs) = &result.detected_languages {
|
||||
println!("Languages: {:?}", langs);
|
||||
}
|
||||
if let Some(method) = result.extraction_method {
|
||||
println!("Extraction method: {:?}", method);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
21
docs/snippets/rust/api/error_handling.md
Normal file
21
docs/snippets/rust/api/error_handling.md
Normal file
@@ -0,0 +1,21 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{extract_file_sync, ExtractionConfig, KreuzbergError};
|
||||
|
||||
fn main() {
|
||||
let config = ExtractionConfig::default();
|
||||
match extract_file_sync("document.pdf", None, &config) {
|
||||
Ok(result) => println!("{}", result.content),
|
||||
Err(KreuzbergError::Io(e)) => eprintln!("File error: {e}"),
|
||||
Err(KreuzbergError::UnsupportedFormat(mime)) => {
|
||||
eprintln!("Unsupported format: {mime}");
|
||||
}
|
||||
Err(KreuzbergError::Parsing { message, .. }) => {
|
||||
eprintln!("Corrupt or invalid document: {message}");
|
||||
}
|
||||
Err(KreuzbergError::MissingDependency(dep)) => {
|
||||
eprintln!("Missing dependency — install {dep}");
|
||||
}
|
||||
Err(e) => eprintln!("Extraction failed: {e}"),
|
||||
}
|
||||
}
|
||||
```
|
||||
23
docs/snippets/rust/api/error_handling_extract.md
Normal file
23
docs/snippets/rust/api/error_handling_extract.md
Normal file
@@ -0,0 +1,23 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{extract_bytes_sync, ExtractionConfig, KreuzbergError, Result};
|
||||
|
||||
fn extract_text(bytes: &[u8], mime_type: &str) -> Result<String> {
|
||||
let config = ExtractionConfig::default();
|
||||
let result = extract_bytes_sync(bytes, mime_type, &config)?;
|
||||
Ok(result.content)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let bytes = std::fs::read("document.pdf").unwrap_or_default();
|
||||
match extract_text(&bytes, "application/pdf") {
|
||||
Ok(text) => println!("Extracted {} chars", text.len()),
|
||||
Err(KreuzbergError::UnsupportedFormat(mime)) => {
|
||||
eprintln!("Format not supported: {mime}");
|
||||
}
|
||||
Err(KreuzbergError::Ocr { message, .. }) => {
|
||||
eprintln!("OCR failed: {message}");
|
||||
}
|
||||
Err(e) => eprintln!("Error: {e}"),
|
||||
}
|
||||
}
|
||||
```
|
||||
14
docs/snippets/rust/api/extract_bytes_async.md
Normal file
14
docs/snippets/rust/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{extract_bytes, ExtractionConfig};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> kreuzberg::Result<()> {
|
||||
let content = tokio::fs::read("document.pdf").await?;
|
||||
let config = ExtractionConfig::default();
|
||||
let result = extract_bytes(&content, "application/pdf", &config).await?;
|
||||
|
||||
println!("{}", result.content);
|
||||
println!("Tables: {}", result.tables.len());
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
13
docs/snippets/rust/api/extract_bytes_sync.md
Normal file
13
docs/snippets/rust/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{extract_bytes_sync, ExtractionConfig};
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
let content = std::fs::read("document.pdf")?;
|
||||
let config = ExtractionConfig::default();
|
||||
let result = extract_bytes_sync(&content, "application/pdf", &config)?;
|
||||
|
||||
println!("{}", result.content);
|
||||
println!("Tables: {}", result.tables.len());
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
14
docs/snippets/rust/api/extract_file_async.md
Normal file
14
docs/snippets/rust/api/extract_file_async.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{extract_file, ExtractionConfig};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> kreuzberg::Result<()> {
|
||||
let config = ExtractionConfig::default();
|
||||
let result = extract_file("document.pdf", None::<&str>, &config).await?;
|
||||
|
||||
println!("{}", result.content);
|
||||
println!("MIME type: {}", result.mime_type);
|
||||
println!("Tables: {}", result.tables.len());
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
13
docs/snippets/rust/api/extract_file_sync.md
Normal file
13
docs/snippets/rust/api/extract_file_sync.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```rust title="Rust"
|
||||
use kreuzberg::{extract_file_sync, ExtractionConfig};
|
||||
|
||||
fn main() -> kreuzberg::Result<()> {
|
||||
let config = ExtractionConfig::default();
|
||||
let result = extract_file_sync("document.pdf", None, &config)?;
|
||||
|
||||
println!("{}", result.content);
|
||||
println!("MIME type: {}", result.mime_type);
|
||||
println!("Tables: {}", result.tables.len());
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user