This commit is contained in:
133
crates/kreuzberg/tests/path_resolution.rs
Normal file
133
crates/kreuzberg/tests/path_resolution.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! Integration tests for image path resolution in markup extractors.
|
||||
|
||||
use kreuzberg::ExtractionConfig;
|
||||
use kreuzberg::ImageExtractionConfig;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn fixtures_dir() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/path_resolution/fixtures")
|
||||
}
|
||||
|
||||
fn config_with_images() -> ExtractionConfig {
|
||||
ExtractionConfig {
|
||||
images: Some(ImageExtractionConfig {
|
||||
extract_images: true,
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_markdown_resolves_images() {
|
||||
let path = fixtures_dir().join("markdown_with_images.md");
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&path, None, &config).await.unwrap();
|
||||
|
||||
let images = result.images.as_ref().expect("should have images");
|
||||
// Should resolve the 2 local images but NOT the https:// URL
|
||||
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
|
||||
|
||||
// Verify image data is non-empty
|
||||
for img in images {
|
||||
assert!(!img.data.is_empty(), "image data should not be empty");
|
||||
assert_eq!(img.format, "png");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_markdown_bytes_no_resolution() {
|
||||
let path = fixtures_dir().join("markdown_with_images.md");
|
||||
let content = std::fs::read(&path).unwrap();
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_bytes(&content, "text/markdown", &config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// extract_bytes has no file path context, so no image resolution should happen
|
||||
let image_count = result.images.as_ref().map_or(0, |imgs| imgs.len());
|
||||
assert_eq!(image_count, 0, "extract_bytes should not resolve local images");
|
||||
}
|
||||
|
||||
#[cfg(feature = "office")]
|
||||
#[tokio::test]
|
||||
async fn test_latex_resolves_images() {
|
||||
let path = fixtures_dir().join("latex_with_images.tex");
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&path, None, &config).await.unwrap();
|
||||
|
||||
let images = result.images.as_ref().expect("should have images");
|
||||
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
|
||||
}
|
||||
|
||||
#[cfg(feature = "office")]
|
||||
#[tokio::test]
|
||||
async fn test_rst_resolves_images() {
|
||||
let path = fixtures_dir().join("rst_with_images.rst");
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&path, Some("text/x-rst"), &config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let images = result.images.as_ref().expect("should have images");
|
||||
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
|
||||
}
|
||||
|
||||
#[cfg(feature = "office")]
|
||||
#[tokio::test]
|
||||
async fn test_orgmode_resolves_images() {
|
||||
let path = fixtures_dir().join("orgmode_with_images.org");
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&path, Some("text/x-org"), &config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let images = result.images.as_ref().expect("should have images");
|
||||
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
|
||||
}
|
||||
|
||||
#[cfg(feature = "office")]
|
||||
#[tokio::test]
|
||||
async fn test_typst_resolves_images() {
|
||||
let path = fixtures_dir().join("typst_with_images.typ");
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&path, Some("application/x-typst"), &config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let images = result.images.as_ref().expect("should have images");
|
||||
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_djot_resolves_images() {
|
||||
let path = fixtures_dir().join("djot_with_images.djot");
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&path, Some("text/djot"), &config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let images = result.images.as_ref().expect("should have images");
|
||||
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_traversal_blocked() {
|
||||
// Create a temp markdown file that references a traversal path
|
||||
let tmp_dir = std::env::temp_dir().join("kreuzberg_path_test");
|
||||
std::fs::create_dir_all(&tmp_dir).unwrap();
|
||||
let md_path = tmp_dir.join("traversal.md");
|
||||
std::fs::write(&md_path, "\n\n").unwrap();
|
||||
|
||||
let config = config_with_images();
|
||||
let result = kreuzberg::extract_file(&md_path, Some("text/markdown"), &config)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Neither should resolve: traversal is blocked, and images/ doesn't exist in tmp
|
||||
let image_count = result.images.as_ref().map_or(0, |imgs| imgs.len());
|
||||
assert_eq!(image_count, 0, "traversal paths should not resolve to images");
|
||||
|
||||
// Cleanup
|
||||
let _ = std::fs::remove_dir_all(&tmp_dir);
|
||||
}
|
||||
Reference in New Issue
Block a user