// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef //! E2e tests for category: format_specific use kreuzberg::{extract_bytes_sync, extract_file_sync}; #[test] fn test_format_docx_standalone() { // Standalone DOCX extraction using extract_bytes_sync let content = std::fs::read(concat!( env!("CARGO_MANIFEST_DIR"), "/../../test_documents/docx/fake.docx" )) .expect("test_documents/docx/fake.docx must exist"); let mime_type = r#"application/vnd.openxmlformats-officedocument.wordprocessingml.document"#; let config = Default::default(); let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed"); assert!( result.content.len() >= 20, "expected length >= 20, got {}", result.content.len() ); } #[test] fn test_format_hwpx_standalone() { // Standalone HWPX extraction using extract_bytes_sync let content = std::fs::read(concat!( env!("CARGO_MANIFEST_DIR"), "/../../test_documents/hwpx/simple.hwpx" )) .expect("test_documents/hwpx/simple.hwpx must exist"); let mime_type = r#"application/haansofthwpx"#; let config = Default::default(); let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed"); assert!( result.content.len() >= 20, "expected length >= 20, got {}", result.content.len() ); assert!( format!("{:?}", result.content).contains(r#"Hello from HWPX"#), "expected to contain: {}", r#"Hello from HWPX"# ); } #[test] fn test_format_pdf_text() { // Standalone PDF text extraction using extract_bytes_sync let content = std::fs::read(concat!( env!("CARGO_MANIFEST_DIR"), "/../../test_documents/pdf/fake_memo.pdf" )) .expect("test_documents/pdf/fake_memo.pdf must exist"); let mime_type = r#"application/pdf"#; let config = Default::default(); let result = extract_bytes_sync(&content, mime_type, &config).expect("should succeed"); assert!( result.content.len() >= 50, "expected length >= 50, got {}", result.content.len() ); assert!( result.content.contains(r#"Mallori"#) || result.content.contains(r#"May"#), "expected to contain at least one of the specified values" ); } #[test] fn test_format_pptx() { // PPTX presentation extraction using extract_file_sync let path: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "pptx/simple.pptx"); let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.presentationml.presentation"#); let config = Default::default(); let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed"); } #[test] fn test_format_xlsx() { // XLSX spreadsheet extraction using extract_file_sync let path: &str = concat!( env!("CARGO_MANIFEST_DIR"), "/../../test_documents/", "xlsx/stanley_cups.xlsx" ); let mime_type = Some(r#"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"#); let config = Default::default(); let _ = extract_file_sync(path, mime_type.as_deref(), &config).expect("should succeed"); }