Files
fil/crates/kreuzberg/tests/registry_integration_tests.rs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

590 lines
17 KiB
Rust

//! Plugin registry integration tests.
//!
//! Tests the core registry APIs for all plugin types:
//! - Validator registration/unregistration
//! - Extractor registration/unregistration
//! - Registry clearing and listing
//! - Error handling and edge cases
use async_trait::async_trait;
use kreuzberg::core::config::ExtractionConfig;
use kreuzberg::plugins::registry::{DocumentExtractorRegistry, ValidatorRegistry};
use kreuzberg::plugins::{DocumentExtractor, Plugin, Validator};
use kreuzberg::types::ExtractionResult;
use kreuzberg::types::internal::{ElementKind, InternalDocument, InternalElement};
use kreuzberg::{KreuzbergError, Result};
use std::path::Path;
use std::sync::Arc;
struct MockValidator {
name: String,
should_fail: bool,
}
impl Plugin for MockValidator {
fn name(&self) -> &str {
&self.name
}
fn version(&self) -> String {
"1.0.0".to_string()
}
fn initialize(&self) -> Result<()> {
Ok(())
}
fn shutdown(&self) -> Result<()> {
Ok(())
}
}
#[async_trait]
impl Validator for MockValidator {
async fn validate(&self, _result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
if self.should_fail {
Err(KreuzbergError::validation("Mock validation failed"))
} else {
Ok(())
}
}
fn priority(&self) -> i32 {
50
}
}
struct FailingInitValidator {
name: String,
}
impl Plugin for FailingInitValidator {
fn name(&self) -> &str {
&self.name
}
fn version(&self) -> String {
"1.0.0".to_string()
}
fn initialize(&self) -> Result<()> {
Err(KreuzbergError::Plugin {
message: "Initialization failed".to_string(),
plugin_name: self.name.clone(),
})
}
fn shutdown(&self) -> Result<()> {
Ok(())
}
}
#[async_trait]
impl Validator for FailingInitValidator {
async fn validate(&self, _result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
Ok(())
}
}
struct MockExtractor {
name: String,
mime_types: Vec<&'static str>,
priority: i32,
}
impl Plugin for MockExtractor {
fn name(&self) -> &str {
&self.name
}
fn version(&self) -> String {
"1.0.0".to_string()
}
fn initialize(&self) -> Result<()> {
Ok(())
}
fn shutdown(&self) -> Result<()> {
Ok(())
}
}
#[async_trait]
impl DocumentExtractor for MockExtractor {
async fn extract_bytes(
&self,
content: &[u8],
mime_type: &str,
_config: &ExtractionConfig,
) -> Result<InternalDocument> {
let mut doc = InternalDocument::new("mock");
doc.mime_type = mime_type.to_string();
doc.push_element(InternalElement::text(
ElementKind::Paragraph,
format!("Extracted by {}: {}", self.name, String::from_utf8_lossy(content)),
0,
));
Ok(doc)
}
async fn extract_file(&self, path: &Path, mime_type: &str, config: &ExtractionConfig) -> Result<InternalDocument> {
let content = std::fs::read(path)?;
self.extract_bytes(&content, mime_type, config).await
}
fn supported_mime_types(&self) -> &[&str] {
&self.mime_types
}
fn priority(&self) -> i32 {
self.priority
}
}
/// Test validator registration and listing.
#[test]
fn test_validator_registration_succeeds() {
let mut registry = ValidatorRegistry::new();
let validator = Arc::new(MockValidator {
name: "test-validator".to_string(),
should_fail: false,
});
let result = registry.register(validator);
assert!(result.is_ok(), "Validator registration should succeed");
let list = registry.list();
assert_eq!(list.len(), 1, "Should have one validator");
assert!(
list.contains(&"test-validator".to_string()),
"Should contain registered validator"
);
}
/// Test registering multiple validators.
#[test]
fn test_register_multiple_validators_succeeds() {
let mut registry = ValidatorRegistry::new();
let v1 = Arc::new(MockValidator {
name: "validator-1".to_string(),
should_fail: false,
});
let v2 = Arc::new(MockValidator {
name: "validator-2".to_string(),
should_fail: false,
});
let v3 = Arc::new(MockValidator {
name: "validator-3".to_string(),
should_fail: true,
});
registry.register(v1).expect("Operation failed");
registry.register(v2).expect("Operation failed");
registry.register(v3).expect("Operation failed");
let list = registry.list();
assert_eq!(list.len(), 3, "Should have three validators");
assert!(list.contains(&"validator-1".to_string()));
assert!(list.contains(&"validator-2".to_string()));
assert!(list.contains(&"validator-3".to_string()));
}
/// Test validator unregistration.
#[test]
fn test_validator_unregistration_succeeds() {
let mut registry = ValidatorRegistry::new();
let validator = Arc::new(MockValidator {
name: "temp-validator".to_string(),
should_fail: false,
});
registry.register(validator).expect("Operation failed");
assert_eq!(registry.list().len(), 1);
let result = registry.remove("temp-validator");
assert!(result.is_ok(), "Unregistration should succeed");
assert_eq!(registry.list().len(), 0, "Registry should be empty after removal");
}
/// Test unregistering non-existent validator.
#[test]
fn test_unregister_nonexistent_validator_succeeds() {
let mut registry = ValidatorRegistry::new();
let result = registry.remove("nonexistent-validator");
assert!(result.is_ok(), "Removing non-existent validator should succeed (no-op)");
}
/// Test validator registration with empty name fails.
#[test]
fn test_validator_registration_with_empty_name_fails() {
let mut registry = ValidatorRegistry::new();
let validator = Arc::new(MockValidator {
name: "".to_string(),
should_fail: false,
});
let result = registry.register(validator);
assert!(result.is_err(), "Registration with empty name should fail");
match result {
Err(KreuzbergError::Validation { message, .. }) => {
assert!(message.contains("empty"), "Error should mention empty name");
}
_ => panic!("Expected Validation error"),
}
}
/// Test validator registration with whitespace in name fails.
#[test]
fn test_validator_registration_with_whitespace_fails() {
let mut registry = ValidatorRegistry::new();
let validator = Arc::new(MockValidator {
name: "validator with spaces".to_string(),
should_fail: false,
});
let result = registry.register(validator);
assert!(result.is_err(), "Registration with whitespace should fail");
match result {
Err(KreuzbergError::Validation { message, .. }) => {
assert!(message.contains("whitespace"), "Error should mention whitespace");
}
_ => panic!("Expected Validation error"),
}
}
/// Test validator registration with failed initialization.
#[test]
fn test_validator_registration_with_failed_init_fails() {
let mut registry = ValidatorRegistry::new();
let validator = Arc::new(FailingInitValidator {
name: "failing-validator".to_string(),
});
let result = registry.register(validator);
assert!(result.is_err(), "Registration with failed init should fail");
match result {
Err(KreuzbergError::Plugin { .. }) => {}
_ => panic!("Expected Plugin error"),
}
assert_eq!(registry.list().len(), 0, "Failed validator should not be registered");
}
/// Test clearing all validators.
#[test]
fn test_clear_validators_succeeds() {
let mut registry = ValidatorRegistry::new();
let v1 = Arc::new(MockValidator {
name: "validator-1".to_string(),
should_fail: false,
});
let v2 = Arc::new(MockValidator {
name: "validator-2".to_string(),
should_fail: false,
});
registry.register(v1).expect("Operation failed");
registry.register(v2).expect("Operation failed");
assert_eq!(registry.list().len(), 2);
let result = registry.shutdown_all();
assert!(result.is_ok(), "Clear should succeed");
assert_eq!(registry.list().len(), 0, "Registry should be empty after clear");
}
/// Test getting all validators in priority order.
#[test]
fn test_get_all_validators_respects_priority() {
let mut registry = ValidatorRegistry::new();
struct PriorityValidator {
name: String,
priority: i32,
}
impl Plugin for PriorityValidator {
fn name(&self) -> &str {
&self.name
}
fn version(&self) -> String {
"1.0.0".to_string()
}
fn initialize(&self) -> Result<()> {
Ok(())
}
fn shutdown(&self) -> Result<()> {
Ok(())
}
}
#[async_trait]
impl Validator for PriorityValidator {
async fn validate(&self, _result: &ExtractionResult, _config: &ExtractionConfig) -> Result<()> {
Ok(())
}
fn priority(&self) -> i32 {
self.priority
}
}
let low = Arc::new(PriorityValidator {
name: "low-priority".to_string(),
priority: 10,
});
let medium = Arc::new(PriorityValidator {
name: "medium-priority".to_string(),
priority: 50,
});
let high = Arc::new(PriorityValidator {
name: "high-priority".to_string(),
priority: 100,
});
registry.register(medium).expect("Operation failed");
registry.register(low).expect("Operation failed");
registry.register(high).expect("Operation failed");
let all = registry.get_all();
assert_eq!(all.len(), 3, "Should have three validators");
assert_eq!(all[0].name(), "high-priority");
assert_eq!(all[1].name(), "medium-priority");
assert_eq!(all[2].name(), "low-priority");
}
/// Test extractor registration and retrieval.
#[test]
fn test_extractor_registration_succeeds() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "test-extractor".to_string(),
mime_types: vec!["text/plain"],
priority: 50,
});
let result = registry.register(extractor);
assert!(result.is_ok(), "Extractor registration should succeed");
let list = registry.list();
assert_eq!(list.len(), 1, "Should have one extractor");
assert!(list.contains(&"test-extractor".to_string()));
}
/// Test extractor retrieval by MIME type.
#[test]
fn test_get_extractor_by_mime_type_succeeds() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "pdf-extractor".to_string(),
mime_types: vec!["application/pdf"],
priority: 50,
});
registry.register(extractor).expect("Operation failed");
let result = registry.get("application/pdf");
assert!(result.is_ok(), "Should find extractor for PDF");
assert_eq!(result.expect("Operation failed").name(), "pdf-extractor");
}
/// Test extractor not found for unsupported MIME type.
#[test]
fn test_get_extractor_for_unsupported_mime_fails() {
let registry = DocumentExtractorRegistry::new();
let result = registry.get("application/nonexistent");
assert!(result.is_err(), "Should not find extractor for unsupported MIME type");
match result {
Err(KreuzbergError::UnsupportedFormat(mime)) => {
assert_eq!(mime, "application/nonexistent");
}
_ => panic!("Expected UnsupportedFormat error"),
}
}
/// Test extractor priority selection.
#[test]
fn test_extractor_priority_selection() {
let mut registry = DocumentExtractorRegistry::new();
let low_priority = Arc::new(MockExtractor {
name: "low-priority-extractor".to_string(),
mime_types: vec!["text/plain"],
priority: 10,
});
let high_priority = Arc::new(MockExtractor {
name: "high-priority-extractor".to_string(),
mime_types: vec!["text/plain"],
priority: 100,
});
registry.register(low_priority).expect("Operation failed");
registry.register(high_priority).expect("Operation failed");
let result = registry.get("text/plain").expect("Value not found");
assert_eq!(
result.name(),
"high-priority-extractor",
"Should select highest priority extractor"
);
}
/// Test extractor wildcard MIME type matching.
#[test]
fn test_extractor_wildcard_mime_matching() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "text-extractor".to_string(),
mime_types: vec!["text/*"],
priority: 50,
});
registry.register(extractor).expect("Operation failed");
let result = registry.get("text/plain");
assert!(result.is_ok(), "Should match text/plain with text/*");
assert_eq!(result.expect("Operation failed").name(), "text-extractor");
let result = registry.get("text/html");
assert!(result.is_ok(), "Should match text/html with text/*");
assert_eq!(result.expect("Operation failed").name(), "text-extractor");
let result = registry.get("application/pdf");
assert!(result.is_err(), "Should not match application/pdf with text/*");
}
/// Test extractor unregistration.
#[test]
fn test_extractor_unregistration_succeeds() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "temp-extractor".to_string(),
mime_types: vec!["text/plain"],
priority: 50,
});
registry.register(extractor).expect("Operation failed");
assert_eq!(registry.list().len(), 1);
let result = registry.remove("temp-extractor");
assert!(result.is_ok(), "Unregistration should succeed");
assert_eq!(registry.list().len(), 0, "Registry should be empty after removal");
let lookup_result = registry.get("text/plain");
assert!(lookup_result.is_err(), "Should not find extractor after removal");
}
/// Test extractor registration with multiple MIME types.
#[test]
fn test_extractor_multiple_mime_types() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "multi-format-extractor".to_string(),
mime_types: vec!["application/pdf", "application/vnd.ms-excel", "text/csv"],
priority: 50,
});
registry.register(extractor).expect("Operation failed");
assert!(registry.get("application/pdf").is_ok());
assert!(registry.get("application/vnd.ms-excel").is_ok());
assert!(registry.get("text/csv").is_ok());
assert_eq!(
registry.get("application/pdf").expect("Value not found").name(),
"multi-format-extractor"
);
assert_eq!(
registry.get("text/csv").expect("Value not found").name(),
"multi-format-extractor"
);
}
/// Test clearing all extractors.
#[test]
fn test_clear_extractors_succeeds() {
let mut registry = DocumentExtractorRegistry::new();
let e1 = Arc::new(MockExtractor {
name: "extractor-1".to_string(),
mime_types: vec!["text/plain"],
priority: 50,
});
let e2 = Arc::new(MockExtractor {
name: "extractor-2".to_string(),
mime_types: vec!["application/pdf"],
priority: 50,
});
registry.register(e1).expect("Operation failed");
registry.register(e2).expect("Operation failed");
assert_eq!(registry.list().len(), 2);
let result = registry.shutdown_all();
assert!(result.is_ok(), "Clear should succeed");
assert_eq!(registry.list().len(), 0, "Registry should be empty after clear");
}
/// Test extractor registration with empty name fails.
#[test]
fn test_extractor_registration_with_empty_name_fails() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "".to_string(),
mime_types: vec!["text/plain"],
priority: 50,
});
let result = registry.register(extractor);
assert!(result.is_err(), "Registration with empty name should fail");
match result {
Err(KreuzbergError::Validation { message, .. }) => {
assert!(message.contains("empty"), "Error should mention empty name");
}
_ => panic!("Expected Validation error"),
}
}
/// Test extractor registration with whitespace fails.
#[test]
fn test_extractor_registration_with_whitespace_fails() {
let mut registry = DocumentExtractorRegistry::new();
let extractor = Arc::new(MockExtractor {
name: "extractor with spaces".to_string(),
mime_types: vec!["text/plain"],
priority: 50,
});
let result = registry.register(extractor);
assert!(result.is_err(), "Registration with whitespace should fail");
match result {
Err(KreuzbergError::Validation { message, .. }) => {
assert!(message.contains("whitespace"), "Error should mention whitespace");
}
_ => panic!("Expected Validation error"),
}
}