Files
fil/e2e/rust/tests/format_specific_test.rs

94 lines
3.4 KiB
Rust
Raw Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
//! E2e tests for category: format_specific
use kreuzberg::{extract_bytes_sync, extract_file_sync};
#[test]
fn test_format_docx_standalone() {
// Standalone DOCX extraction using extract_bytes_sync
let content = std::fs::read(concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../test_documents/docx/fake.docx"
))
.expect("test_documents/docx/fake.docx must exist");
let mime_type = r#"application/vnd.openxmlformats-officedocument.wordprocessingml.document"#;
let config = Default::default();
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
assert!(
result.content.len() >= 20,
"expected length >= 20, got {}",
result.content.len()
);
}
#[test]
fn test_format_hwpx_standalone() {
// Standalone HWPX extraction using extract_bytes_sync
let content = std::fs::read(concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../test_documents/hwpx/simple.hwpx"
))
.expect("test_documents/hwpx/simple.hwpx must exist");
let mime_type = r#"application/haansofthwpx"#;
let config = Default::default();
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
assert!(
result.content.len() >= 20,
"expected length >= 20, got {}",
result.content.len()
);
assert!(
format!("{:?}", result.content).contains(r#"Hello from HWPX"#),
"expected to contain: {}",
r#"Hello from HWPX"#
);
}
#[test]
fn test_format_pdf_text() {
// Standalone PDF text extraction using extract_bytes_sync
let content = std::fs::read(concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../test_documents/pdf/fake_memo.pdf"
))
.expect("test_documents/pdf/fake_memo.pdf must exist");
let mime_type = r#"application/pdf"#;
let config = Default::default();
let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed");
assert!(
result.content.len() >= 50,
"expected length >= 50, got {}",
result.content.len()
);
assert!(
result.content.contains(r#"Mallori"#) || result.content.contains(r#"May"#),
"expected to contain at least one of the specified values"
);
}
#[test]
fn test_format_pptx() {
// PPTX presentation extraction using extract_file_sync
let path: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "pptx/simple.pptx");
let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.presentationml.presentation"#);
let config = Default::default();
let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
}
#[test]
fn test_format_xlsx() {
// XLSX spreadsheet extraction using extract_file_sync
let path: &str = concat!(
env!("CARGO_MANIFEST_DIR"),
"/../../test_documents/",
"xlsx/stanley_cups.xlsx"
);
let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"#);
let config = Default::default();
let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed");
}