This commit is contained in:
93
e2e/rust/tests/format_specific_test.rs
generated
Normal file
93
e2e/rust/tests/format_specific_test.rs
generated
Normal file
@@ -0,0 +1,93 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
//! E2e tests for category: format_specific
|
||||
|
||||
use kreuzberg::{extract_bytes_sync, extract_file_sync};
|
||||
|
||||
#[test]
|
||||
fn test_format_docx_standalone() {
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
let content = std::fs::read(concat!(
|
||||
env!("CARGO_MANIFEST_DIR"),
|
||||
"/../../test_documents/docx/fake.docx"
|
||||
))
|
||||
.expect("test_documents/docx/fake.docx must exist");
|
||||
let mime_type = r#"application/vnd.openxmlformats-officedocument.wordprocessingml.document"#;
|
||||
let config = Default::default();
|
||||
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||||
assert!(
|
||||
result.content.len() >= 20,
|
||||
"expected length >= 20, got {}",
|
||||
result.content.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_hwpx_standalone() {
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
let content = std::fs::read(concat!(
|
||||
env!("CARGO_MANIFEST_DIR"),
|
||||
"/../../test_documents/hwpx/simple.hwpx"
|
||||
))
|
||||
.expect("test_documents/hwpx/simple.hwpx must exist");
|
||||
let mime_type = r#"application/haansofthwpx"#;
|
||||
let config = Default::default();
|
||||
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||||
assert!(
|
||||
result.content.len() >= 20,
|
||||
"expected length >= 20, got {}",
|
||||
result.content.len()
|
||||
);
|
||||
assert!(
|
||||
format!("{:?}", result.content).contains(r#"Hello from HWPX"#),
|
||||
"expected to contain: {}",
|
||||
r#"Hello from HWPX"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_pdf_text() {
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
let content = std::fs::read(concat!(
|
||||
env!("CARGO_MANIFEST_DIR"),
|
||||
"/../../test_documents/pdf/fake_memo.pdf"
|
||||
))
|
||||
.expect("test_documents/pdf/fake_memo.pdf must exist");
|
||||
let mime_type = r#"application/pdf"#;
|
||||
let config = Default::default();
|
||||
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
|
||||
assert!(
|
||||
result.content.len() >= 50,
|
||||
"expected length >= 50, got {}",
|
||||
result.content.len()
|
||||
);
|
||||
assert!(
|
||||
result.content.contains(r#"Mallori"#) || result.content.contains(r#"May"#),
|
||||
"expected to contain at least one of the specified values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_pptx() {
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
let path: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "pptx/simple.pptx");
|
||||
let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.presentationml.presentation"#);
|
||||
let config = Default::default();
|
||||
let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_xlsx() {
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
let path: &str = concat!(
|
||||
env!("CARGO_MANIFEST_DIR"),
|
||||
"/../../test_documents/",
|
||||
"xlsx/stanley_cups.xlsx"
|
||||
);
|
||||
let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"#);
|
||||
let config = Default::default();
|
||||
let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
|
||||
}
|
||||
Reference in New Issue
Block a user