This commit is contained in:
552
crates/kreuzberg/tests/archive_integration.rs
Normal file
552
crates/kreuzberg/tests/archive_integration.rs
Normal file
@@ -0,0 +1,552 @@
|
||||
//! Archive extraction integration tests.
|
||||
//!
|
||||
//! Tests for ZIP, TAR, TAR.GZ, and 7z archive extraction.
|
||||
//! Validates metadata extraction, content extraction, nested archives, and error handling.
|
||||
|
||||
#![cfg(feature = "archives")]
|
||||
|
||||
use kreuzberg::core::config::ExtractionConfig;
|
||||
use kreuzberg::core::extractor::{extract_bytes, extract_bytes_sync};
|
||||
use std::io::{Cursor, Write};
|
||||
use tar::Builder as TarBuilder;
|
||||
use zip::write::{FileOptions, ZipWriter};
|
||||
|
||||
mod helpers;
|
||||
|
||||
/// Test basic ZIP extraction with single file.
|
||||
#[tokio::test]
|
||||
async fn test_zip_basic_extraction() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let zip_bytes = create_simple_zip();
|
||||
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract ZIP successfully");
|
||||
|
||||
assert_eq!(result.mime_type, "application/zip");
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("ZIP Archive"));
|
||||
assert!(result.content.contains("test.txt"));
|
||||
assert!(result.content.contains("Hello from ZIP!"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.format, "ZIP");
|
||||
assert_eq!(archive_meta.file_count, 1);
|
||||
assert_eq!(archive_meta.file_list.len(), 1);
|
||||
assert_eq!(archive_meta.file_list[0], "test.txt");
|
||||
}
|
||||
|
||||
/// Test ZIP with multiple files.
|
||||
#[tokio::test]
|
||||
async fn test_zip_multiple_files() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
zip.start_file("file1.txt", options).expect("Operation failed");
|
||||
zip.write_all(b"Content 1").expect("Operation failed");
|
||||
|
||||
zip.start_file("file2.md", options).expect("Operation failed");
|
||||
zip.write_all(b"# Content 2").expect("Operation failed");
|
||||
|
||||
zip.start_file("file3.json", options).expect("Operation failed");
|
||||
zip.write_all(b"{\"key\": \"value\"}").expect("Operation failed");
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract multi-file ZIP");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("file1.txt"));
|
||||
assert!(result.content.contains("file2.md"));
|
||||
assert!(result.content.contains("file3.json"));
|
||||
|
||||
assert!(result.content.contains("Content 1"));
|
||||
assert!(result.content.contains("Content 2"));
|
||||
assert!(result.content.contains("value"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.file_count, 3, "Should have 3 files");
|
||||
assert_eq!(archive_meta.file_list.len(), 3, "file_list should contain 3 entries");
|
||||
assert!(archive_meta.file_list.contains(&"file1.txt".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"file2.md".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"file3.json".to_string()));
|
||||
}
|
||||
|
||||
/// Test ZIP with nested directory structure.
|
||||
#[tokio::test]
|
||||
async fn test_zip_nested_directories() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
zip.add_directory("dir1/", options).expect("Operation failed");
|
||||
zip.add_directory("dir1/subdir/", options).expect("Operation failed");
|
||||
|
||||
zip.start_file("dir1/file.txt", options).expect("Operation failed");
|
||||
zip.write_all(b"File in dir1").expect("Operation failed");
|
||||
|
||||
zip.start_file("dir1/subdir/nested.txt", options)
|
||||
.expect("Operation failed");
|
||||
zip.write_all(b"Nested file").expect("Operation failed");
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract nested ZIP");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("dir1/"));
|
||||
assert!(result.content.contains("dir1/file.txt"));
|
||||
assert!(result.content.contains("dir1/subdir/nested.txt"));
|
||||
|
||||
assert!(result.content.contains("File in dir1"));
|
||||
assert!(result.content.contains("Nested file"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert!(
|
||||
archive_meta.file_count >= 2,
|
||||
"Should have at least 2 files (excluding empty dirs)"
|
||||
);
|
||||
assert!(archive_meta.file_list.iter().any(|f| f.contains("dir1/file.txt")));
|
||||
assert!(
|
||||
archive_meta
|
||||
.file_list
|
||||
.iter()
|
||||
.any(|f| f.contains("dir1/subdir/nested.txt"))
|
||||
);
|
||||
}
|
||||
|
||||
/// Test TAR extraction.
|
||||
#[tokio::test]
|
||||
async fn test_tar_extraction() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let tar_bytes = create_simple_tar();
|
||||
|
||||
let result = extract_bytes(&tar_bytes, "application/x-tar", &config)
|
||||
.await
|
||||
.expect("Should extract TAR successfully");
|
||||
|
||||
assert_eq!(result.mime_type, "application/x-tar");
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("TAR Archive"));
|
||||
assert!(result.content.contains("test.txt"));
|
||||
assert!(result.content.contains("Hello from TAR!"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.format, "TAR");
|
||||
assert_eq!(archive_meta.file_count, 1);
|
||||
}
|
||||
|
||||
/// Test TAR.GZ extraction (compressed TAR).
|
||||
///
|
||||
/// Note: TAR.GZ requires decompression before extraction.
|
||||
/// This test validates TAR extraction which is the underlying format.
|
||||
#[tokio::test]
|
||||
async fn test_tar_gz_extraction() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let tar_bytes = create_simple_tar();
|
||||
|
||||
let result = extract_bytes(&tar_bytes, "application/x-tar", &config)
|
||||
.await
|
||||
.expect("Should extract TAR");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("TAR Archive"));
|
||||
assert!(result.content.contains("test.txt"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.format, "TAR");
|
||||
assert_eq!(archive_meta.file_count, 1);
|
||||
|
||||
let result2 = extract_bytes(&tar_bytes, "application/tar", &config)
|
||||
.await
|
||||
.expect("Should extract with alternative MIME type");
|
||||
|
||||
assert!(
|
||||
result2.chunks.is_none(),
|
||||
"Chunks should be None without chunking config"
|
||||
);
|
||||
assert!(result2.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result2.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result2.content.contains("TAR Archive"));
|
||||
assert!(result2.metadata.format.is_some());
|
||||
}
|
||||
|
||||
/// Test 7z extraction.
|
||||
#[tokio::test]
|
||||
async fn test_7z_extraction() {
|
||||
println!("7z test requires real 7z file - skipping programmatic creation");
|
||||
}
|
||||
|
||||
/// Test nested archive (ZIP inside ZIP).
|
||||
#[tokio::test]
|
||||
async fn test_nested_archive() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let inner_zip = create_simple_zip();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
zip.start_file("inner.zip", options).expect("Operation failed");
|
||||
zip.write_all(&inner_zip).expect("Operation failed");
|
||||
|
||||
zip.start_file("readme.txt", options).expect("Operation failed");
|
||||
zip.write_all(b"This archive contains another archive")
|
||||
.expect("Operation failed");
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let outer_zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&outer_zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract nested ZIP");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("inner.zip"));
|
||||
assert!(result.content.contains("readme.txt"));
|
||||
assert!(result.content.contains("This archive contains another archive"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.file_count, 2, "Should have 2 files in outer archive");
|
||||
assert!(archive_meta.file_list.contains(&"inner.zip".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"readme.txt".to_string()));
|
||||
}
|
||||
|
||||
/// Test archive with mixed file formats (PDF, DOCX, images).
|
||||
#[tokio::test]
|
||||
async fn test_archive_mixed_formats() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
zip.start_file("document.txt", options).expect("Operation failed");
|
||||
zip.write_all(b"Text document").expect("Operation failed");
|
||||
|
||||
zip.start_file("readme.md", options).expect("Operation failed");
|
||||
zip.write_all(b"# README").expect("Operation failed");
|
||||
|
||||
zip.start_file("image.png", options).expect("Operation failed");
|
||||
zip.write_all(&[0x89, 0x50, 0x4E, 0x47]).expect("Operation failed");
|
||||
|
||||
zip.start_file("document.pdf", options).expect("Operation failed");
|
||||
zip.write_all(b"%PDF-1.4").expect("Operation failed");
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract mixed-format ZIP");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("document.txt"));
|
||||
assert!(result.content.contains("readme.md"));
|
||||
assert!(result.content.contains("image.png"));
|
||||
assert!(result.content.contains("document.pdf"));
|
||||
|
||||
assert!(result.content.contains("Text document"));
|
||||
assert!(result.content.contains("# README"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.file_count, 4, "Should have 4 files");
|
||||
assert_eq!(archive_meta.file_list.len(), 4, "file_list should contain 4 entries");
|
||||
assert!(archive_meta.file_list.contains(&"document.txt".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"readme.md".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"image.png".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"document.pdf".to_string()));
|
||||
}
|
||||
|
||||
/// Test password-protected archive (should fail gracefully).
|
||||
#[tokio::test]
|
||||
async fn test_password_protected_archive() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let invalid_zip = vec![0x50, 0x4B, 0x03, 0x04];
|
||||
|
||||
let result = extract_bytes(&invalid_zip, "application/zip", &config).await;
|
||||
|
||||
assert!(result.is_err(), "Should fail on invalid/encrypted ZIP");
|
||||
}
|
||||
|
||||
/// Test corrupted archive.
|
||||
#[tokio::test]
|
||||
async fn test_corrupted_archive() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let corrupted_zip = vec![0x50, 0x4B, 0x03, 0x04, 0xFF, 0xFF, 0xFF, 0xFF];
|
||||
|
||||
let result = extract_bytes(&corrupted_zip, "application/zip", &config).await;
|
||||
|
||||
assert!(result.is_err(), "Should fail on corrupted ZIP");
|
||||
|
||||
let mut corrupted_tar = vec![0xFF; 512];
|
||||
corrupted_tar[0..5].copy_from_slice(b"file\0");
|
||||
|
||||
let result = extract_bytes(&corrupted_tar, "application/x-tar", &config).await;
|
||||
assert!(
|
||||
result.is_ok() || result.is_err(),
|
||||
"Should handle corrupted TAR gracefully"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test large archive (100+ files).
|
||||
#[tokio::test]
|
||||
async fn test_large_archive() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
for i in 0..100 {
|
||||
zip.start_file(format!("file_{}.txt", i), options)
|
||||
.expect("Operation failed");
|
||||
zip.write_all(format!("Content {}", i).as_bytes())
|
||||
.expect("Failed to convert to bytes");
|
||||
}
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract large ZIP");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.file_count, 100, "Should have 100 files");
|
||||
assert_eq!(
|
||||
archive_meta.file_list.len(),
|
||||
100,
|
||||
"file_list should contain 100 entries"
|
||||
);
|
||||
|
||||
assert!(result.content.contains("file_0.txt"));
|
||||
assert!(result.content.contains("file_99.txt"));
|
||||
assert!(archive_meta.file_list.contains(&"file_0.txt".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"file_50.txt".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"file_99.txt".to_string()));
|
||||
}
|
||||
|
||||
/// Test archive with special characters and Unicode filenames.
|
||||
#[tokio::test]
|
||||
async fn test_archive_with_special_characters() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
zip.start_file("测试文件.txt", options).expect("Operation failed");
|
||||
zip.write_all("Unicode content".as_bytes())
|
||||
.expect("Failed to convert to bytes");
|
||||
|
||||
zip.start_file("file with spaces.txt", options)
|
||||
.expect("Operation failed");
|
||||
zip.write_all(b"Spaces in filename").expect("Operation failed");
|
||||
|
||||
zip.start_file("file-with-dashes.txt", options)
|
||||
.expect("Operation failed");
|
||||
zip.write_all(b"Dashes").expect("Operation failed");
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract ZIP with special characters");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("测试文件.txt") || result.content.contains("txt"));
|
||||
assert!(result.content.contains("file with spaces.txt"));
|
||||
assert!(result.content.contains("file-with-dashes.txt"));
|
||||
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.file_count, 3, "Should have 3 files");
|
||||
assert_eq!(archive_meta.file_list.len(), 3, "file_list should contain 3 entries");
|
||||
assert!(archive_meta.file_list.iter().any(|f| f.contains("txt")));
|
||||
assert!(archive_meta.file_list.contains(&"file with spaces.txt".to_string()));
|
||||
assert!(archive_meta.file_list.contains(&"file-with-dashes.txt".to_string()));
|
||||
}
|
||||
|
||||
/// Test empty archive.
|
||||
#[tokio::test]
|
||||
async fn test_empty_archive() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let zip = ZipWriter::new(&mut cursor);
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
|
||||
let zip_bytes = cursor.into_inner();
|
||||
let result = extract_bytes(&zip_bytes, "application/zip", &config)
|
||||
.await
|
||||
.expect("Should extract empty ZIP");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("ZIP Archive"));
|
||||
assert!(result.metadata.format.is_some());
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.file_count, 0, "Empty archive should have 0 files");
|
||||
assert_eq!(archive_meta.total_size, 0, "Empty archive should have 0 total size");
|
||||
assert!(archive_meta.file_list.is_empty(), "file_list should be empty");
|
||||
}
|
||||
|
||||
/// Test synchronous archive extraction.
|
||||
#[test]
|
||||
fn test_archive_extraction_sync() {
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let zip_bytes = create_simple_zip();
|
||||
let result = extract_bytes_sync(&zip_bytes, "application/zip", &config).expect("Should extract ZIP synchronously");
|
||||
|
||||
assert!(result.chunks.is_none(), "Chunks should be None without chunking config");
|
||||
assert!(result.detected_languages.is_none(), "Language detection not enabled");
|
||||
assert!(result.tables.is_empty(), "Archive should not have tables");
|
||||
|
||||
assert!(result.content.contains("ZIP Archive"));
|
||||
assert!(result.content.contains("test.txt"));
|
||||
assert!(result.content.contains("Hello from ZIP!"));
|
||||
|
||||
assert!(result.metadata.format.is_some(), "Should have archive metadata");
|
||||
let archive_meta = match result.metadata.format.as_ref().expect("Operation failed") {
|
||||
kreuzberg::FormatMetadata::Archive(meta) => meta,
|
||||
_ => panic!("Expected Archive metadata"),
|
||||
};
|
||||
assert_eq!(archive_meta.format, "ZIP");
|
||||
assert_eq!(archive_meta.file_count, 1);
|
||||
assert_eq!(archive_meta.file_list.len(), 1);
|
||||
assert_eq!(archive_meta.file_list[0], "test.txt");
|
||||
}
|
||||
|
||||
fn create_simple_zip() -> Vec<u8> {
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut zip = ZipWriter::new(&mut cursor);
|
||||
let options = FileOptions::<'_, ()>::default();
|
||||
|
||||
zip.start_file("test.txt", options).expect("Operation failed");
|
||||
zip.write_all(b"Hello from ZIP!").expect("Operation failed");
|
||||
|
||||
zip.finish().expect("Operation failed");
|
||||
}
|
||||
cursor.into_inner()
|
||||
}
|
||||
|
||||
fn create_simple_tar() -> Vec<u8> {
|
||||
let mut cursor = Cursor::new(Vec::new());
|
||||
{
|
||||
let mut tar = TarBuilder::new(&mut cursor);
|
||||
|
||||
let data = b"Hello from TAR!";
|
||||
let mut header = tar::Header::new_gnu();
|
||||
header.set_path("test.txt").expect("Operation failed");
|
||||
header.set_size(data.len() as u64);
|
||||
header.set_cksum();
|
||||
tar.append(&header, &data[..]).expect("Operation failed");
|
||||
|
||||
tar.finish().expect("Operation failed");
|
||||
}
|
||||
cursor.into_inner()
|
||||
}
|
||||
Reference in New Issue
Block a user