796 lines
20 KiB
Rust
796 lines
20 KiB
Rust
|
|
//! Comprehensive validator plugin system tests.
|
||
|
|
//!
|
||
|
|
//! Tests custom validator registration, execution, validation logic,
|
||
|
|
//! error handling, and cleanup with real file extraction.
|
||
|
|
|
||
|
|
use async_trait::async_trait;
|
||
|
|
use kreuzberg::core::config::ExtractionConfig;
|
||
|
|
use kreuzberg::plugins::registry::get_validator_registry;
|
||
|
|
use kreuzberg::plugins::{Plugin, Validator};
|
||
|
|
use kreuzberg::types::ExtractionResult;
|
||
|
|
use kreuzberg::{KreuzbergError, Result, extract_file_sync};
|
||
|
|
use serial_test::serial;
|
||
|
|
use std::sync::Arc;
|
||
|
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||
|
|
|
||
|
|
struct MinLengthValidator {
|
||
|
|
name: String,
|
||
|
|
min_length: usize,
|
||
|
|
call_count: AtomicUsize,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Plugin for MinLengthValidator {
|
||
|
|
fn name(&self) -> &str {
|
||
|
|
&self.name
|
||
|
|
}
|
||
|
|
|
||
|
|
fn version(&self) -> String {
|
||
|
|
"1.0.0".to_string()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn initialize(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn shutdown(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl Validator for MinLengthValidator {
|
||
|
|
async fn validate(&self, result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
||
|
|
self.call_count.fetch_add(1, Ordering::SeqCst);
|
||
|
|
|
||
|
|
if result.content.len() < self.min_length {
|
||
|
|
Err(KreuzbergError::validation(format!(
|
||
|
|
"Content too short: {} < {} characters",
|
||
|
|
result.content.len(),
|
||
|
|
self.min_length
|
||
|
|
)))
|
||
|
|
} else {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn priority(&self) -> i32 {
|
||
|
|
50
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
struct PassingValidator {
|
||
|
|
name: String,
|
||
|
|
initialized: AtomicBool,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Plugin for PassingValidator {
|
||
|
|
fn name(&self) -> &str {
|
||
|
|
&self.name
|
||
|
|
}
|
||
|
|
|
||
|
|
fn version(&self) -> String {
|
||
|
|
"1.0.0".to_string()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn initialize(&self) -> Result<()> {
|
||
|
|
self.initialized.store(true, Ordering::Release);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn shutdown(&self) -> Result<()> {
|
||
|
|
self.initialized.store(false, Ordering::Release);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl Validator for PassingValidator {
|
||
|
|
async fn validate(&self, _result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
struct MimeTypeValidator {
|
||
|
|
name: String,
|
||
|
|
allowed_mime: String,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Plugin for MimeTypeValidator {
|
||
|
|
fn name(&self) -> &str {
|
||
|
|
&self.name
|
||
|
|
}
|
||
|
|
|
||
|
|
fn version(&self) -> String {
|
||
|
|
"1.0.0".to_string()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn initialize(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn shutdown(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl Validator for MimeTypeValidator {
|
||
|
|
async fn validate(&self, result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
||
|
|
if result.mime_type != self.allowed_mime {
|
||
|
|
Err(KreuzbergError::validation(format!(
|
||
|
|
"MIME type '{}' not allowed, expected '{}'",
|
||
|
|
result.mime_type, self.allowed_mime
|
||
|
|
)))
|
||
|
|
} else {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn should_validate(&self, result: &ExtractionResult, _config: &ExtractionConfig) -> bool {
|
||
|
|
!result.mime_type.is_empty()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
struct MetadataValidator {
|
||
|
|
name: String,
|
||
|
|
required_key: String,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Plugin for MetadataValidator {
|
||
|
|
fn name(&self) -> &str {
|
||
|
|
&self.name
|
||
|
|
}
|
||
|
|
|
||
|
|
fn version(&self) -> String {
|
||
|
|
"1.0.0".to_string()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn initialize(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn shutdown(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl Validator for MetadataValidator {
|
||
|
|
async fn validate(&self, result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
||
|
|
if !result.metadata.additional.contains_key(self.required_key.as_str()) {
|
||
|
|
Err(KreuzbergError::validation(format!(
|
||
|
|
"Required metadata key '{}' missing",
|
||
|
|
self.required_key
|
||
|
|
)))
|
||
|
|
} else {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn priority(&self) -> i32 {
|
||
|
|
100
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
struct FailingValidator {
|
||
|
|
name: String,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Plugin for FailingValidator {
|
||
|
|
fn name(&self) -> &str {
|
||
|
|
&self.name
|
||
|
|
}
|
||
|
|
|
||
|
|
fn version(&self) -> String {
|
||
|
|
"1.0.0".to_string()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn initialize(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn shutdown(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl Validator for FailingValidator {
|
||
|
|
async fn validate(&self, _result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
||
|
|
Err(KreuzbergError::validation(
|
||
|
|
"Validation intentionally failed".to_string(),
|
||
|
|
))
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
struct TrackingValidator {
|
||
|
|
name: String,
|
||
|
|
called: AtomicBool,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl Plugin for TrackingValidator {
|
||
|
|
fn name(&self) -> &str {
|
||
|
|
&self.name
|
||
|
|
}
|
||
|
|
|
||
|
|
fn version(&self) -> String {
|
||
|
|
"1.0.0".to_string()
|
||
|
|
}
|
||
|
|
|
||
|
|
fn initialize(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
|
||
|
|
fn shutdown(&self) -> Result<()> {
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[async_trait]
|
||
|
|
impl Validator for TrackingValidator {
|
||
|
|
async fn validate(&self, _result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
|
||
|
|
self.called.store(true, Ordering::Release);
|
||
|
|
Ok(())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_register_custom_validator() {
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(PassingValidator {
|
||
|
|
name: "test-validator".to_string(),
|
||
|
|
initialized: AtomicBool::new(false),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
let result = reg.register(Arc::clone(&validator) as Arc<dyn Validator>);
|
||
|
|
assert!(result.is_ok(), "Failed to register validator: {:?}", result.err());
|
||
|
|
}
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
validator.initialized.load(Ordering::Acquire),
|
||
|
|
"Validator was not initialized"
|
||
|
|
);
|
||
|
|
|
||
|
|
let list = {
|
||
|
|
let reg = registry.read();
|
||
|
|
reg.list()
|
||
|
|
};
|
||
|
|
|
||
|
|
assert!(list.contains(&"test-validator".to_string()));
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_called_during_extraction() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(MinLengthValidator {
|
||
|
|
name: "call-test-validator".to_string(),
|
||
|
|
min_length: 1,
|
||
|
|
call_count: AtomicUsize::new(0),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(Arc::clone(&validator) as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_ok(), "Extraction failed: {:?}", result.err());
|
||
|
|
|
||
|
|
assert_eq!(
|
||
|
|
validator.call_count.load(Ordering::SeqCst),
|
||
|
|
1,
|
||
|
|
"Validator was not called exactly once"
|
||
|
|
);
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_can_reject_invalid_input() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(MinLengthValidator {
|
||
|
|
name: "reject-validator".to_string(),
|
||
|
|
min_length: 1_000_000,
|
||
|
|
call_count: AtomicUsize::new(0),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator as Arc<dyn Validator>).expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_err(), "Expected validation to fail");
|
||
|
|
|
||
|
|
match result.expect_err("Operation failed") {
|
||
|
|
KreuzbergError::Validation { message, .. } => {
|
||
|
|
assert!(message.contains("Content too short"));
|
||
|
|
}
|
||
|
|
other => panic!("Expected Validation error, got: {:?}", other),
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_can_pass_valid_input() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(MinLengthValidator {
|
||
|
|
name: "pass-validator".to_string(),
|
||
|
|
min_length: 10,
|
||
|
|
call_count: AtomicUsize::new(0),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator as Arc<dyn Validator>).expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_ok(), "Validation should have passed: {:?}", result.err());
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_receives_correct_parameters() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(MimeTypeValidator {
|
||
|
|
name: "mime-validator".to_string(),
|
||
|
|
allowed_mime: "text/plain".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator as Arc<dyn Validator>).expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_ok(), "Validation failed: {:?}", result.err());
|
||
|
|
|
||
|
|
let extraction_result = result.expect("Operation failed");
|
||
|
|
assert_eq!(extraction_result.mime_type, "text/plain");
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_rejects_wrong_mime_type() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(MimeTypeValidator {
|
||
|
|
name: "strict-mime-validator".to_string(),
|
||
|
|
allowed_mime: "application/pdf".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator as Arc<dyn Validator>).expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_err(), "Expected MIME type validation to fail");
|
||
|
|
|
||
|
|
match result.expect_err("Operation failed") {
|
||
|
|
KreuzbergError::Validation { message, .. } => {
|
||
|
|
assert!(message.contains("MIME type"));
|
||
|
|
assert!(message.contains("not allowed"));
|
||
|
|
}
|
||
|
|
other => panic!("Expected Validation error, got: {:?}", other),
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_unregister_validator() {
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(FailingValidator {
|
||
|
|
name: "unregister-test".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator as Arc<dyn Validator>).expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.remove("unregister-test").expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let list = {
|
||
|
|
let reg = registry.read();
|
||
|
|
reg.list()
|
||
|
|
};
|
||
|
|
|
||
|
|
assert!(!list.contains(&"unregister-test".to_string()));
|
||
|
|
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
result.is_ok(),
|
||
|
|
"Extraction should succeed after unregistering validator"
|
||
|
|
);
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_clear_all_validators() {
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator1 = Arc::new(FailingValidator {
|
||
|
|
name: "clear-test-1".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
let validator2 = Arc::new(FailingValidator {
|
||
|
|
name: "clear-test-2".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator1 as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
reg.register(validator2 as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let list = {
|
||
|
|
let reg = registry.read();
|
||
|
|
reg.list()
|
||
|
|
};
|
||
|
|
|
||
|
|
assert!(list.is_empty(), "Registry was not cleared");
|
||
|
|
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_ok(), "Extraction should succeed after clearing validators");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_invalid_name() {
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(PassingValidator {
|
||
|
|
name: "invalid name".to_string(),
|
||
|
|
initialized: AtomicBool::new(false),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
let result = reg.register(validator);
|
||
|
|
|
||
|
|
assert!(result.is_err());
|
||
|
|
assert!(matches!(
|
||
|
|
result.expect_err("Operation failed"),
|
||
|
|
KreuzbergError::Validation { .. }
|
||
|
|
));
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_initialization_lifecycle() {
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(PassingValidator {
|
||
|
|
name: "lifecycle-test".to_string(),
|
||
|
|
initialized: AtomicBool::new(false),
|
||
|
|
});
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
!validator.initialized.load(Ordering::Acquire),
|
||
|
|
"Validator should not be initialized yet"
|
||
|
|
);
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(Arc::clone(&validator) as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
validator.initialized.load(Ordering::Acquire),
|
||
|
|
"Validator should be initialized after registration"
|
||
|
|
);
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
!validator.initialized.load(Ordering::Acquire),
|
||
|
|
"Validator should be shutdown"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_multiple_validators_execution() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator1 = Arc::new(MinLengthValidator {
|
||
|
|
name: "multi-validator-1".to_string(),
|
||
|
|
min_length: 10,
|
||
|
|
call_count: AtomicUsize::new(0),
|
||
|
|
});
|
||
|
|
|
||
|
|
let validator2 = Arc::new(MimeTypeValidator {
|
||
|
|
name: "multi-validator-2".to_string(),
|
||
|
|
allowed_mime: "text/plain".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(Arc::clone(&validator1) as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
reg.register(validator2 as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_ok(), "Both validators should pass");
|
||
|
|
assert_eq!(validator1.call_count.load(Ordering::SeqCst), 1);
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_priority_execution_order() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let high_priority = Arc::new(MetadataValidator {
|
||
|
|
name: "high-priority-validator".to_string(),
|
||
|
|
required_key: "nonexistent_key".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
let low_priority = Arc::new(PassingValidator {
|
||
|
|
name: "low-priority-validator".to_string(),
|
||
|
|
initialized: AtomicBool::new(false),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(high_priority as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
reg.register(low_priority as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_err(), "Expected high-priority validator to fail");
|
||
|
|
|
||
|
|
match result.expect_err("Operation failed") {
|
||
|
|
KreuzbergError::Validation { message, .. } => {
|
||
|
|
assert!(message.contains("Required metadata key"));
|
||
|
|
}
|
||
|
|
other => panic!("Expected Validation error, got: {:?}", other),
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_always_fails() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let validator = Arc::new(FailingValidator {
|
||
|
|
name: "always-fails".to_string(),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(validator as Arc<dyn Validator>).expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_err(), "Validator should always fail");
|
||
|
|
|
||
|
|
match result.expect_err("Operation failed") {
|
||
|
|
KreuzbergError::Validation { message, .. } => {
|
||
|
|
assert!(message.contains("intentionally failed"));
|
||
|
|
}
|
||
|
|
other => panic!("Expected Validation error, got: {:?}", other),
|
||
|
|
}
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[serial]
|
||
|
|
fn test_validator_registration_order_preserved_for_same_priority() {
|
||
|
|
let test_file = "../../test_documents/text/fake_text.txt";
|
||
|
|
let registry = get_validator_registry();
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let tracker = Arc::new(TrackingValidator {
|
||
|
|
name: "order-second".to_string(),
|
||
|
|
called: AtomicBool::new(false),
|
||
|
|
});
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.register(Arc::new(FailingValidator {
|
||
|
|
name: "order-first".to_string(),
|
||
|
|
}) as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
reg.register(tracker.clone() as Arc<dyn Validator>)
|
||
|
|
.expect("Operation failed");
|
||
|
|
}
|
||
|
|
|
||
|
|
let config = ExtractionConfig::default();
|
||
|
|
let result = extract_file_sync(test_file, None, &config);
|
||
|
|
|
||
|
|
assert!(result.is_err(), "Expected first validator to fail");
|
||
|
|
assert!(
|
||
|
|
!tracker.called.load(Ordering::Acquire),
|
||
|
|
"Second validator should not run once the first validator fails"
|
||
|
|
);
|
||
|
|
|
||
|
|
{
|
||
|
|
let mut reg = registry.write();
|
||
|
|
reg.shutdown_all().expect("Operation failed");
|
||
|
|
}
|
||
|
|
}
|