This commit is contained in:
136
crates/kreuzberg/tests/extraction_timeout_tests.rs
Normal file
136
crates/kreuzberg/tests/extraction_timeout_tests.rs
Normal file
@@ -0,0 +1,136 @@
|
||||
//! Regression tests for #830: extraction_timeout_secs silently ignored in single-file paths.
|
||||
|
||||
use kreuzberg::KreuzbergError;
|
||||
use kreuzberg::core::config::ExtractionConfig;
|
||||
use kreuzberg::core::extractor::{extract_bytes, extract_file};
|
||||
use std::time::Instant;
|
||||
|
||||
/// A timeout of 0 seconds should fire immediately, before any real work is done.
|
||||
/// We use plain-text content so the test doesn't require external binaries (Tesseract, PDF extractor).
|
||||
#[cfg(feature = "tokio-runtime")]
|
||||
#[tokio::test]
|
||||
async fn test_extract_bytes_zero_timeout_returns_timeout_error() {
|
||||
let config = ExtractionConfig {
|
||||
extraction_timeout_secs: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let content = b"Hello world, this is a plain-text document.";
|
||||
let result = extract_bytes(content, "text/plain", &config).await;
|
||||
|
||||
match result {
|
||||
Err(KreuzbergError::Timeout { limit_ms, .. }) => {
|
||||
assert_eq!(limit_ms, 0, "limit_ms should reflect the configured 0-second timeout");
|
||||
}
|
||||
// text/plain is synchronous — if it completes before the timeout fires that's also
|
||||
// acceptable, but we still confirm no other error type is raised.
|
||||
Ok(_) => {}
|
||||
Err(e) => panic!("Expected Ok or Timeout, got: {e:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Same check for extract_file.
|
||||
#[cfg(feature = "tokio-runtime")]
|
||||
#[tokio::test]
|
||||
async fn test_extract_file_zero_timeout_returns_timeout_error() {
|
||||
// Write a small temp file
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let path = dir.path().join("hello.txt");
|
||||
std::fs::write(&path, b"Hello world").expect("write");
|
||||
|
||||
let config = ExtractionConfig {
|
||||
extraction_timeout_secs: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = extract_file(&path, None, &config).await;
|
||||
|
||||
match result {
|
||||
Err(KreuzbergError::Timeout { limit_ms, .. }) => {
|
||||
assert_eq!(limit_ms, 0);
|
||||
}
|
||||
Ok(_) => {} // synchronous text extraction may beat a 0s timeout
|
||||
Err(e) => panic!("Expected Ok or Timeout, got: {e:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// When no timeout is configured, extraction should succeed normally.
|
||||
#[cfg(feature = "tokio-runtime")]
|
||||
#[tokio::test]
|
||||
async fn test_extract_bytes_no_timeout_succeeds() {
|
||||
let config = ExtractionConfig::default();
|
||||
let content = b"No timeout configured.";
|
||||
let result = extract_bytes(content, "text/plain", &config).await;
|
||||
assert!(result.is_ok(), "extraction without timeout should succeed: {result:?}");
|
||||
}
|
||||
|
||||
/// When no timeout is configured, file extraction should succeed normally.
|
||||
#[cfg(feature = "tokio-runtime")]
|
||||
#[tokio::test]
|
||||
async fn test_extract_file_no_timeout_succeeds() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let path = dir.path().join("test.txt");
|
||||
std::fs::write(&path, b"No timeout configured.").expect("write");
|
||||
|
||||
let config = ExtractionConfig::default();
|
||||
let result = extract_file(&path, None, &config).await;
|
||||
assert!(result.is_ok(), "extraction without timeout should succeed: {result:?}");
|
||||
}
|
||||
|
||||
/// Elapsed time reported in the error must be <= limit_ms for reasonable timeouts.
|
||||
#[cfg(feature = "tokio-runtime")]
|
||||
#[tokio::test]
|
||||
async fn test_extract_bytes_timeout_elapsed_is_plausible() {
|
||||
let config = ExtractionConfig {
|
||||
extraction_timeout_secs: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
let content = b"timing check";
|
||||
let start = Instant::now();
|
||||
let _ = extract_bytes(content, "text/plain", &config).await;
|
||||
let wall_ms = start.elapsed().as_millis() as u64;
|
||||
// We can't assert the timeout fired, but if it did, wall time should be <1 second.
|
||||
assert!(
|
||||
wall_ms < 1000,
|
||||
"single-file extraction with 0s timeout took too long: {wall_ms}ms"
|
||||
);
|
||||
}
|
||||
|
||||
/// When no tokio-runtime is available, setting a timeout should return a Validation error.
|
||||
#[cfg(not(feature = "tokio-runtime"))]
|
||||
#[tokio::test]
|
||||
async fn test_extract_bytes_timeout_without_tokio_returns_validation_error() {
|
||||
let config = ExtractionConfig {
|
||||
extraction_timeout_secs: Some(5),
|
||||
..Default::default()
|
||||
};
|
||||
let content = b"testing";
|
||||
let result = extract_bytes(content, "text/plain", &config).await;
|
||||
match result {
|
||||
Err(KreuzbergError::Validation { message, .. }) => {
|
||||
assert!(message.contains("requires the 'tokio-runtime' feature"));
|
||||
}
|
||||
other => panic!("Expected Validation error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// When no tokio-runtime is available, setting a timeout should return a Validation error.
|
||||
#[cfg(not(feature = "tokio-runtime"))]
|
||||
#[tokio::test]
|
||||
async fn test_extract_file_timeout_without_tokio_returns_validation_error() {
|
||||
let dir = tempfile::tempdir().expect("tempdir");
|
||||
let file_path = dir.path().join("test.txt");
|
||||
std::fs::write(&file_path, b"testing").unwrap();
|
||||
|
||||
let config = ExtractionConfig {
|
||||
extraction_timeout_secs: Some(5),
|
||||
..Default::default()
|
||||
};
|
||||
let result = extract_file(&file_path, Some("text/plain"), &config).await;
|
||||
match result {
|
||||
Err(KreuzbergError::Validation { message, .. }) => {
|
||||
assert!(message.contains("requires the 'tokio-runtime' feature"));
|
||||
}
|
||||
other => panic!("Expected Validation error, got {other:?}"),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user