497 lines
17 KiB
Rust
497 lines
17 KiB
Rust
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
//! E2e tests for category: contract
|
||
|
|
|
||
|
|
use kreuzberg::{extract_bytes_sync, extract_file, extract_file_sync};
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_api_batch_bytes_async() {
|
||
|
|
// Tests async batch bytes extraction API (batch_extract_bytes)
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_file(path, mime_type.as_deref(), &config)
|
||
|
|
.await
|
||
|
|
.expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.contains(r#"May 5, 2023"#) || result.content.contains(r#"Mallori"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_api_batch_bytes_with_configs_async() {
|
||
|
|
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"output_format":"markdown"}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file(path, mime_type.as_deref(), &config)
|
||
|
|
.await
|
||
|
|
.expect("should succeed");
|
||
|
|
let _metadata_output_format = result
|
||
|
|
.metadata
|
||
|
|
.output_format
|
||
|
|
.as_ref()
|
||
|
|
.map(|v| v.to_string())
|
||
|
|
.unwrap_or_default();
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
// skipped: field 'metadata.output_format' not available on result type
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_api_batch_file_async() {
|
||
|
|
// Tests async batch file extraction API (batch_extract_file)
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_file(path, mime_type.as_deref(), &config)
|
||
|
|
.await
|
||
|
|
.expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.contains(r#"May 5, 2023"#) || result.content.contains(r#"Mallori"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_api_batch_file_with_configs_async() {
|
||
|
|
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"output_format":"markdown"}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file(path, mime_type.as_deref(), &config)
|
||
|
|
.await
|
||
|
|
.expect("should succeed");
|
||
|
|
let _metadata_output_format = result
|
||
|
|
.metadata
|
||
|
|
.output_format
|
||
|
|
.as_ref()
|
||
|
|
.map(|v| v.to_string())
|
||
|
|
.unwrap_or_default();
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
// skipped: field 'metadata.output_format' not available on result type
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_api_extract_bytes_async() {
|
||
|
|
// Tests async bytes extraction API (extract_bytes)
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_file(path, mime_type.as_deref(), &config)
|
||
|
|
.await
|
||
|
|
.expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.contains(r#"May 5, 2023"#) || result.content.contains(r#"Mallori"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_api_extract_file_async() {
|
||
|
|
// Tests async file extraction API (extract_file)
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_file(path, mime_type.as_deref(), &config)
|
||
|
|
.await
|
||
|
|
.expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.contains(r#"May 5, 2023"#) || result.content.contains(r#"Mallori"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_chunking_prepend_heading_context() {
|
||
|
|
// Tests markdown chunker prepends heading hierarchy to chunk content
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"markdown/extraction_test.md"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(
|
||
|
|
r#"{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}"#,
|
||
|
|
)
|
||
|
|
.unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
let chunks = &result.chunks;
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
chunks.as_ref().map_or(0, |v| v.len()) >= 2 as usize,
|
||
|
|
"expected >= 2 chunks"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result
|
||
|
|
.chunks
|
||
|
|
.as_ref()
|
||
|
|
.is_some_and(|chunks| !chunks.is_empty() && chunks.iter().all(|c| !c.content.is_empty())),
|
||
|
|
"expected all chunks to have content"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result
|
||
|
|
.chunks
|
||
|
|
.as_ref()
|
||
|
|
.is_some_and(|chunks| !chunks.is_empty() && chunks.iter().all(|c| !c.content.is_empty())),
|
||
|
|
"expected chunks with heading context"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.chunks.as_ref().is_some_and(|chunks| chunks
|
||
|
|
.first()
|
||
|
|
.map_or(false, |c| c.content.trim_start().starts_with('#'))),
|
||
|
|
"expected first chunk to start with heading"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_document_structure_with_headings() {
|
||
|
|
// Tests document structure with DOCX heading-driven nesting
|
||
|
|
let path: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "docx/fake.docx");
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"include_document_structure":true}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/vnd.openxmlformats-officedocument.wordprocessingml.document"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
// skipped: field 'document' not available on result type
|
||
|
|
// skipped: field 'document.nodes' not available on result type
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_element_types() {
|
||
|
|
// Tests element-based result format with element type assertions on DOCX
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"docx/unit_test_headers.docx"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"result_format":"element_based"}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert!(
|
||
|
|
result
|
||
|
|
.mime_type
|
||
|
|
.contains(r#"application/vnd.openxmlformats-officedocument.wordprocessingml.document"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
// skipped: field 'elements' not available on result type
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_extraction_timeout() {
|
||
|
|
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"extraction_timeout_secs":300}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_keywords() {
|
||
|
|
// Tests keyword extraction via YAKE algorithm
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value =
|
||
|
|
serde_json::from_str(r#"{"keywords":{"algorithm":"yake","max_keywords":10}}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.extracted_keywords.as_ref().is_some_and(|v| !v.is_empty()),
|
||
|
|
"expected keywords to be present and non-empty"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.extracted_keywords.as_ref().is_some_and(|v| !v.is_empty()),
|
||
|
|
"expected >= 1"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_pages() {
|
||
|
|
// Tests page extraction and page marker configuration
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value =
|
||
|
|
serde_json::from_str(r#"{"pages":{"extract_pages":true,"insert_page_markers":true}}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.contains(r#"PAGE"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_quality_enabled() {
|
||
|
|
// Tests quality scoring produces a score value in [0.0, 1.0]
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"enable_quality_processing":true}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
// skipped: field 'quality_score' not available on result type
|
||
|
|
// skipped: field 'quality_score' not available on result type
|
||
|
|
// skipped: field 'quality_score' not available on result type
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_security_limits() {
|
||
|
|
// Tests archive extraction with custom security limits
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"archives/documents.zip"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(
|
||
|
|
r#"{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}"#,
|
||
|
|
)
|
||
|
|
.unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert!(
|
||
|
|
result.mime_type.contains(r#"application/zip"#) || result.mime_type.contains(r#"application/x-zip-compressed"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_config_tree_sitter() {
|
||
|
|
// Tests tree-sitter configuration round-trip
|
||
|
|
let path: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "code/hello.py");
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"text/x-source-code"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 5,
|
||
|
|
"expected length >= 5, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_output_format_bytes_markdown() {
|
||
|
|
// Tests markdown output format via bytes extraction API
|
||
|
|
let content = std::fs::read(concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/pdf/fake_memo.pdf"
|
||
|
|
))
|
||
|
|
.expect("test_documents/pdf/fake_memo.pdf must exist");
|
||
|
|
let mime_type = r#"application/pdf"#;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"output_format":"markdown"}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||
|
|
let _metadata_output_format = result
|
||
|
|
.metadata
|
||
|
|
.output_format
|
||
|
|
.as_ref()
|
||
|
|
.map(|v| v.to_string())
|
||
|
|
.unwrap_or_default();
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
// skipped: field 'metadata.output_format' not available on result type
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_output_format_markdown() {
|
||
|
|
// Tests Markdown output format
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"pdf/fake_memo.pdf"
|
||
|
|
);
|
||
|
|
let mime_type: Option<String> = None;
|
||
|
|
let config_json: serde_json::Value = serde_json::from_str(r#"{"output_format":"markdown"}"#).unwrap();
|
||
|
|
let config = serde_json::from_value(config_json).unwrap();
|
||
|
|
let result = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
let _metadata_output_format = result
|
||
|
|
.metadata
|
||
|
|
.output_format
|
||
|
|
.as_ref()
|
||
|
|
.map(|v| v.to_string())
|
||
|
|
.unwrap_or_default();
|
||
|
|
assert_eq!(
|
||
|
|
result.mime_type.to_string().as_str().trim(),
|
||
|
|
r#"application/pdf"#,
|
||
|
|
"equals assertion failed"
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 10,
|
||
|
|
"expected length >= 10, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
// skipped: field 'metadata.output_format' not available on result type
|
||
|
|
}
|