137 lines
5.1 KiB
Rust
137 lines
5.1 KiB
Rust
|
|
//! Regression tests for #830: extraction_timeout_secs silently ignored in single-file paths.
|
||
|
|
|
||
|
|
use kreuzberg::KreuzbergError;
|
||
|
|
use kreuzberg::core::config::ExtractionConfig;
|
||
|
|
use kreuzberg::core::extractor::{extract_bytes, extract_file};
|
||
|
|
use std::time::Instant;
|
||
|
|
|
||
|
|
/// A timeout of 0 seconds should fire immediately, before any real work is done.
|
||
|
|
/// We use plain-text content so the test doesn't require external binaries (Tesseract, PDF extractor).
|
||
|
|
#[cfg(feature = "tokio-runtime")]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_bytes_zero_timeout_returns_timeout_error() {
|
||
|
|
let config = ExtractionConfig {
|
||
|
|
extraction_timeout_secs: Some(0),
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
|
||
|
|
let content = b"Hello world, this is a plain-text document.";
|
||
|
|
let result = extract_bytes(content, "text/plain", &config).await;
|
||
|
|
|
||
|
|
match result {
|
||
|
|
Err(KreuzbergError::Timeout { limit_ms, .. }) => {
|
||
|
|
assert_eq!(limit_ms, 0, "limit_ms should reflect the configured 0-second timeout");
|
||
|
|
}
|
||
|
|
// text/plain is synchronous — if it completes before the timeout fires that's also
|
||
|
|
// acceptable, but we still confirm no other error type is raised.
|
||
|
|
Ok(_) => {}
|
||
|
|
Err(e) => panic!("Expected Ok or Timeout, got: {e:?}"),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Same check for extract_file.
|
||
|
|
#[cfg(feature = "tokio-runtime")]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_file_zero_timeout_returns_timeout_error() {
|
||
|
|
// Write a small temp file
|
||
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
||
|
|
let path = dir.path().join("hello.txt");
|
||
|
|
std::fs::write(&path, b"Hello world").expect("write");
|
||
|
|
|
||
|
|
let config = ExtractionConfig {
|
||
|
|
extraction_timeout_secs: Some(0),
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
|
||
|
|
let result = extract_file(&path, None, &config).await;
|
||
|
|
|
||
|
|
match result {
|
||
|
|
Err(KreuzbergError::Timeout { limit_ms, .. }) => {
|
||
|
|
assert_eq!(limit_ms, 0);
|
||
|
|
}
|
||
|
|
Ok(_) => {} // synchronous text extraction may beat a 0s timeout
|
||
|
|
Err(e) => panic!("Expected Ok or Timeout, got: {e:?}"),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// When no timeout is configured, extraction should succeed normally.
|
||
|
|
#[cfg(feature = "tokio-runtime")]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_bytes_no_timeout_succeeds() {
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let content = b"No timeout configured.";
|
||
|
|
let result = extract_bytes(content, "text/plain", &config).await;
|
||
|
|
assert!(result.is_ok(), "extraction without timeout should succeed: {result:?}");
|
||
|
|
}
|
||
|
|
|
||
|
|
/// When no timeout is configured, file extraction should succeed normally.
|
||
|
|
#[cfg(feature = "tokio-runtime")]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_file_no_timeout_succeeds() {
|
||
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
||
|
|
let path = dir.path().join("test.txt");
|
||
|
|
std::fs::write(&path, b"No timeout configured.").expect("write");
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file(&path, None, &config).await;
|
||
|
|
assert!(result.is_ok(), "extraction without timeout should succeed: {result:?}");
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Elapsed time reported in the error must be <= limit_ms for reasonable timeouts.
|
||
|
|
#[cfg(feature = "tokio-runtime")]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_bytes_timeout_elapsed_is_plausible() {
|
||
|
|
let config = ExtractionConfig {
|
||
|
|
extraction_timeout_secs: Some(0),
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
let content = b"timing check";
|
||
|
|
let start = Instant::now();
|
||
|
|
let _ = extract_bytes(content, "text/plain", &config).await;
|
||
|
|
let wall_ms = start.elapsed().as_millis() as u64;
|
||
|
|
// We can't assert the timeout fired, but if it did, wall time should be <1 second.
|
||
|
|
assert!(
|
||
|
|
wall_ms < 1000,
|
||
|
|
"single-file extraction with 0s timeout took too long: {wall_ms}ms"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// When no tokio-runtime is available, setting a timeout should return a Validation error.
|
||
|
|
#[cfg(not(feature = "tokio-runtime"))]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_bytes_timeout_without_tokio_returns_validation_error() {
|
||
|
|
let config = ExtractionConfig {
|
||
|
|
extraction_timeout_secs: Some(5),
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
let content = b"testing";
|
||
|
|
let result = extract_bytes(content, "text/plain", &config).await;
|
||
|
|
match result {
|
||
|
|
Err(KreuzbergError::Validation { message, .. }) => {
|
||
|
|
assert!(message.contains("requires the 'tokio-runtime' feature"));
|
||
|
|
}
|
||
|
|
other => panic!("Expected Validation error, got {other:?}"),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// When no tokio-runtime is available, setting a timeout should return a Validation error.
|
||
|
|
#[cfg(not(feature = "tokio-runtime"))]
|
||
|
|
#[tokio::test]
|
||
|
|
async fn test_extract_file_timeout_without_tokio_returns_validation_error() {
|
||
|
|
let dir = tempfile::tempdir().expect("tempdir");
|
||
|
|
let file_path = dir.path().join("test.txt");
|
||
|
|
std::fs::write(&file_path, b"testing").unwrap();
|
||
|
|
|
||
|
|
let config = ExtractionConfig {
|
||
|
|
extraction_timeout_secs: Some(5),
|
||
|
|
..Default::default()
|
||
|
|
};
|
||
|
|
let result = extract_file(&file_path, Some("text/plain"), &config).await;
|
||
|
|
match result {
|
||
|
|
Err(KreuzbergError::Validation { message, .. }) => {
|
||
|
|
assert!(message.contains("requires the 'tokio-runtime' feature"));
|
||
|
|
}
|
||
|
|
other => panic!("Expected Validation error, got {other:?}"),
|
||
|
|
}
|
||
|
|
}
|