Files
fil/crates/kreuzberg/tests/path_resolution.rs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

134 lines
4.7 KiB
Rust

//! Integration tests for image path resolution in markup extractors.
use kreuzberg::ExtractionConfig;
use kreuzberg::ImageExtractionConfig;
use std::path::PathBuf;
fn fixtures_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/path_resolution/fixtures")
}
fn config_with_images() -> ExtractionConfig {
ExtractionConfig {
images: Some(ImageExtractionConfig {
extract_images: true,
..Default::default()
}),
..Default::default()
}
}
#[tokio::test]
async fn test_markdown_resolves_images() {
let path = fixtures_dir().join("markdown_with_images.md");
let config = config_with_images();
let result = kreuzberg::extract_file(&path, None, &config).await.unwrap();
let images = result.images.as_ref().expect("should have images");
// Should resolve the 2 local images but NOT the https:// URL
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
// Verify image data is non-empty
for img in images {
assert!(!img.data.is_empty(), "image data should not be empty");
assert_eq!(img.format, "png");
}
}
#[tokio::test]
async fn test_markdown_bytes_no_resolution() {
let path = fixtures_dir().join("markdown_with_images.md");
let content = std::fs::read(&path).unwrap();
let config = config_with_images();
let result = kreuzberg::extract_bytes(&content, "text/markdown", &config)
.await
.unwrap();
// extract_bytes has no file path context, so no image resolution should happen
let image_count = result.images.as_ref().map_or(0, |imgs| imgs.len());
assert_eq!(image_count, 0, "extract_bytes should not resolve local images");
}
#[cfg(feature = "office")]
#[tokio::test]
async fn test_latex_resolves_images() {
let path = fixtures_dir().join("latex_with_images.tex");
let config = config_with_images();
let result = kreuzberg::extract_file(&path, None, &config).await.unwrap();
let images = result.images.as_ref().expect("should have images");
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
}
#[cfg(feature = "office")]
#[tokio::test]
async fn test_rst_resolves_images() {
let path = fixtures_dir().join("rst_with_images.rst");
let config = config_with_images();
let result = kreuzberg::extract_file(&path, Some("text/x-rst"), &config)
.await
.unwrap();
let images = result.images.as_ref().expect("should have images");
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
}
#[cfg(feature = "office")]
#[tokio::test]
async fn test_orgmode_resolves_images() {
let path = fixtures_dir().join("orgmode_with_images.org");
let config = config_with_images();
let result = kreuzberg::extract_file(&path, Some("text/x-org"), &config)
.await
.unwrap();
let images = result.images.as_ref().expect("should have images");
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
}
#[cfg(feature = "office")]
#[tokio::test]
async fn test_typst_resolves_images() {
let path = fixtures_dir().join("typst_with_images.typ");
let config = config_with_images();
let result = kreuzberg::extract_file(&path, Some("application/x-typst"), &config)
.await
.unwrap();
let images = result.images.as_ref().expect("should have images");
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
}
#[tokio::test]
async fn test_djot_resolves_images() {
let path = fixtures_dir().join("djot_with_images.djot");
let config = config_with_images();
let result = kreuzberg::extract_file(&path, Some("text/djot"), &config)
.await
.unwrap();
let images = result.images.as_ref().expect("should have images");
assert_eq!(images.len(), 2, "expected 2 resolved images, got {}", images.len());
}
#[tokio::test]
async fn test_traversal_blocked() {
// Create a temp markdown file that references a traversal path
let tmp_dir = std::env::temp_dir().join("kreuzberg_path_test");
std::fs::create_dir_all(&tmp_dir).unwrap();
let md_path = tmp_dir.join("traversal.md");
std::fs::write(&md_path, "![evil](../../../etc/passwd)\n![ok](images/test_image.png)\n").unwrap();
let config = config_with_images();
let result = kreuzberg::extract_file(&md_path, Some("text/markdown"), &config)
.await
.unwrap();
// Neither should resolve: traversal is blocked, and images/ doesn't exist in tmp
let image_count = result.images.as_ref().map_or(0, |imgs| imgs.len());
assert_eq!(image_count, 0, "traversal paths should not resolve to images");
// Cleanup
let _ = std::fs::remove_dir_all(&tmp_dir);
}