94 lines
3.4 KiB
Rust
94 lines
3.4 KiB
Rust
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
//! E2e tests for category: format_specific
|
||
|
|
|
||
|
|
use kreuzberg::{extract_bytes_sync, extract_file_sync};
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_format_docx_standalone() {
|
||
|
|
// Standalone DOCX extraction using extract_bytes_sync
|
||
|
|
let content = std::fs::read(concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/docx/fake.docx"
|
||
|
|
))
|
||
|
|
.expect("test_documents/docx/fake.docx must exist");
|
||
|
|
let mime_type = r#"application/vnd.openxmlformats-officedocument.wordprocessingml.document"#;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 20,
|
||
|
|
"expected length >= 20, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_format_hwpx_standalone() {
|
||
|
|
// Standalone HWPX extraction using extract_bytes_sync
|
||
|
|
let content = std::fs::read(concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/hwpx/simple.hwpx"
|
||
|
|
))
|
||
|
|
.expect("test_documents/hwpx/simple.hwpx must exist");
|
||
|
|
let mime_type = r#"application/haansofthwpx"#;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 20,
|
||
|
|
"expected length >= 20, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
format!("{:?}", result.content).contains(r#"Hello from HWPX"#),
|
||
|
|
"expected to contain: {}",
|
||
|
|
r#"Hello from HWPX"#
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_format_pdf_text() {
|
||
|
|
// Standalone PDF text extraction using extract_bytes_sync
|
||
|
|
let content = std::fs::read(concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/pdf/fake_memo.pdf"
|
||
|
|
))
|
||
|
|
.expect("test_documents/pdf/fake_memo.pdf must exist");
|
||
|
|
let mime_type = r#"application/pdf"#;
|
||
|
|
let config = Default::default();
|
||
|
|
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||
|
|
assert!(
|
||
|
|
result.content.len() >= 50,
|
||
|
|
"expected length >= 50, got {}",
|
||
|
|
result.content.len()
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
result.content.contains(r#"Mallori"#) || result.content.contains(r#"May"#),
|
||
|
|
"expected to contain at least one of the specified values"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_format_pptx() {
|
||
|
|
// PPTX presentation extraction using extract_file_sync
|
||
|
|
let path: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "pptx/simple.pptx");
|
||
|
|
let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.presentationml.presentation"#);
|
||
|
|
let config = Default::default();
|
||
|
|
let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn test_format_xlsx() {
|
||
|
|
// XLSX spreadsheet extraction using extract_file_sync
|
||
|
|
let path: &str = concat!(
|
||
|
|
env!("CARGO_MANIFEST_DIR"),
|
||
|
|
"/../../test_documents/",
|
||
|
|
"xlsx/stanley_cups.xlsx"
|
||
|
|
);
|
||
|
|
let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"#);
|
||
|
|
let config = Default::default();
|
||
|
|
let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||
|
|
}
|