315 lines
10 KiB
Rust
315 lines
10 KiB
Rust
//! MCP contract tests - verify MCP config matches Rust core
|
|
//!
|
|
//! This test suite validates that MCP (Model Context Protocol) configuration
|
|
//! produces identical JSON to the Rust core library when parsing configuration.
|
|
//! This ensures that MCP users get the same configuration behavior as CLI and SDK users.
|
|
|
|
use kreuzberg::core::config::ExtractionConfig;
|
|
use kreuzberg::core::config::OutputFormat;
|
|
use serde_json::json;
|
|
|
|
#[test]
|
|
fn test_mcp_basic_config_json_matches_rust_core() {
|
|
// Create config via Rust core
|
|
let rust_config = ExtractionConfig {
|
|
use_cache: true,
|
|
enable_quality_processing: true,
|
|
force_ocr: false,
|
|
output_format: OutputFormat::Plain,
|
|
result_format: kreuzberg::types::ResultFormat::Unified,
|
|
..Default::default()
|
|
};
|
|
let rust_json = serde_json::to_value(&rust_config).expect("Failed to serialize rust config");
|
|
|
|
// Simulate MCP config parameter deserialization
|
|
let mcp_json = json!({
|
|
"use_cache": true,
|
|
"enable_quality_processing": true,
|
|
"force_ocr": false,
|
|
"output_format": "plain",
|
|
"result_format": "unified"
|
|
});
|
|
let mcp_config: ExtractionConfig =
|
|
serde_json::from_value(mcp_json.clone()).expect("Failed to deserialize MCP config");
|
|
let mcp_serialized = serde_json::to_value(&mcp_config).expect("Failed to serialize MCP config");
|
|
|
|
// Verify they produce identical JSON for the relevant fields
|
|
assert_eq!(
|
|
rust_json.get("use_cache"),
|
|
mcp_serialized.get("use_cache"),
|
|
"MCP use_cache must match Rust core"
|
|
);
|
|
assert_eq!(
|
|
rust_json.get("enable_quality_processing"),
|
|
mcp_serialized.get("enable_quality_processing"),
|
|
"MCP enable_quality_processing must match Rust core"
|
|
);
|
|
assert_eq!(
|
|
rust_json.get("force_ocr"),
|
|
mcp_serialized.get("force_ocr"),
|
|
"MCP force_ocr must match Rust core"
|
|
);
|
|
assert_eq!(
|
|
rust_json.get("output_format"),
|
|
mcp_serialized.get("output_format"),
|
|
"MCP output_format must match Rust core"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_ocr_config_nested_matches_rust_core() {
|
|
let mcp_json = json!({
|
|
"ocr": {
|
|
"backend": "tesseract"
|
|
},
|
|
"force_ocr": true
|
|
});
|
|
|
|
let config: ExtractionConfig = serde_json::from_value(mcp_json).expect("Failed to deserialize OCR config");
|
|
|
|
// Verify OCR config deserialized correctly
|
|
assert!(config.ocr.is_some(), "OCR config should be present");
|
|
assert!(config.force_ocr, "force_ocr should be true");
|
|
|
|
if let Some(ocr) = &config.ocr {
|
|
assert_eq!(ocr.backend, "tesseract", "OCR backend should be tesseract");
|
|
}
|
|
|
|
// Verify roundtrip
|
|
let serialized = serde_json::to_value(&config).expect("Failed to serialize");
|
|
assert!(serialized.get("ocr").is_some(), "Serialized config should include ocr");
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_chunking_config_nested_matches_rust_core() {
|
|
let mcp_json = json!({
|
|
"chunking": {
|
|
"max_chars": 500,
|
|
"max_overlap": 50,
|
|
"strategy": "sliding_window"
|
|
}
|
|
});
|
|
|
|
let config: ExtractionConfig = serde_json::from_value(mcp_json).expect("Failed to deserialize chunking config");
|
|
|
|
// Verify chunking config deserialized correctly
|
|
assert!(config.chunking.is_some(), "Chunking config should be present");
|
|
|
|
if let Some(chunking) = &config.chunking {
|
|
assert_eq!(chunking.max_characters, 500, "max_chars should be 500");
|
|
assert_eq!(chunking.overlap, 50, "max_overlap should be 50");
|
|
}
|
|
|
|
// Verify roundtrip
|
|
let serialized = serde_json::to_value(&config).expect("Failed to serialize");
|
|
assert!(
|
|
serialized.get("chunking").is_some(),
|
|
"Serialized config should include chunking"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_full_config_preserves_all_fields() {
|
|
let full_config_json = json!({
|
|
"use_cache": false,
|
|
"enable_quality_processing": true,
|
|
"force_ocr": true,
|
|
"output_format": "markdown",
|
|
"result_format": "unified",
|
|
"max_concurrent_extractions": 8,
|
|
"ocr": {
|
|
"backend": "tesseract"
|
|
},
|
|
"chunking": {
|
|
"max_chars": 1000,
|
|
"max_overlap": 200
|
|
}
|
|
});
|
|
|
|
let config: ExtractionConfig =
|
|
serde_json::from_value(full_config_json.clone()).expect("Failed to deserialize full config");
|
|
let roundtrip_json = serde_json::to_value(&config).expect("Failed to serialize");
|
|
|
|
// Verify all top-level fields preserved
|
|
assert!(!config.use_cache, "use_cache should be false");
|
|
assert!(
|
|
config.enable_quality_processing,
|
|
"enable_quality_processing should be true"
|
|
);
|
|
assert!(config.force_ocr, "force_ocr should be true");
|
|
assert_eq!(
|
|
config.max_concurrent_extractions,
|
|
Some(8),
|
|
"max_concurrent_extractions should be 8"
|
|
);
|
|
|
|
// Verify nested fields preserved
|
|
assert!(config.ocr.is_some(), "OCR config should be present");
|
|
assert!(config.chunking.is_some(), "Chunking config should be present");
|
|
|
|
// Verify roundtrip integrity
|
|
assert_eq!(
|
|
roundtrip_json.get("use_cache"),
|
|
full_config_json.get("use_cache"),
|
|
"use_cache should survive roundtrip"
|
|
);
|
|
assert_eq!(
|
|
roundtrip_json.get("force_ocr"),
|
|
full_config_json.get("force_ocr"),
|
|
"force_ocr should survive roundtrip"
|
|
);
|
|
assert_eq!(
|
|
roundtrip_json.get("max_concurrent_extractions"),
|
|
full_config_json.get("max_concurrent_extractions"),
|
|
"max_concurrent_extractions should survive roundtrip"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_default_config_matches_rust_core_defaults() {
|
|
// Create Rust core default
|
|
let rust_default = ExtractionConfig::default();
|
|
let rust_json = serde_json::to_value(&rust_default).expect("Failed to serialize default");
|
|
|
|
// Create empty JSON (simulates MCP with no overrides)
|
|
let mcp_json = json!({});
|
|
let mcp_config: ExtractionConfig = serde_json::from_value(mcp_json).expect("Failed to deserialize empty config");
|
|
let mcp_json_serialized = serde_json::to_value(&mcp_config).expect("Failed to serialize MCP default");
|
|
|
|
// Verify defaults match
|
|
assert_eq!(
|
|
mcp_json_serialized.get("use_cache"),
|
|
rust_json.get("use_cache"),
|
|
"use_cache default should match"
|
|
);
|
|
assert_eq!(
|
|
mcp_json_serialized.get("enable_quality_processing"),
|
|
rust_json.get("enable_quality_processing"),
|
|
"enable_quality_processing default should match"
|
|
);
|
|
assert_eq!(
|
|
mcp_json_serialized.get("force_ocr"),
|
|
rust_json.get("force_ocr"),
|
|
"force_ocr default should match"
|
|
);
|
|
assert_eq!(
|
|
mcp_json_serialized.get("result_format"),
|
|
rust_json.get("result_format"),
|
|
"result_format default should match"
|
|
);
|
|
assert_eq!(
|
|
mcp_json_serialized.get("output_format"),
|
|
rust_json.get("output_format"),
|
|
"output_format default should match"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_output_format_values_are_valid() {
|
|
// Test all valid output format values (lowercase, as per serde rename_all)
|
|
let valid_formats = vec!["plain", "markdown", "html"];
|
|
|
|
for format in valid_formats {
|
|
let mcp_json = json!({
|
|
"output_format": format
|
|
});
|
|
|
|
let result = serde_json::from_value::<ExtractionConfig>(mcp_json);
|
|
assert!(result.is_ok(), "Format '{}' should deserialize successfully", format);
|
|
|
|
let config = result.unwrap();
|
|
assert!(
|
|
!config.output_format.to_string().is_empty(),
|
|
"Deserialized format should have valid string representation"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_result_format_values_are_valid() {
|
|
// Test valid result format values (lowercase, as per serde rename_all)
|
|
let valid_formats = vec!["unified", "element_based"];
|
|
|
|
for format in valid_formats {
|
|
let mcp_json = json!({
|
|
"result_format": format
|
|
});
|
|
|
|
let result = serde_json::from_value::<ExtractionConfig>(mcp_json);
|
|
assert!(
|
|
result.is_ok(),
|
|
"Result format '{}' should deserialize successfully",
|
|
format
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_partial_override_preserves_defaults() {
|
|
// Create a partial config that overrides only one field
|
|
let partial_json = json!({
|
|
"force_ocr": true
|
|
});
|
|
|
|
let config: ExtractionConfig = serde_json::from_value(partial_json).expect("Failed to deserialize partial config");
|
|
|
|
// Verify override applied
|
|
assert!(config.force_ocr, "force_ocr override should be applied");
|
|
|
|
// Verify defaults preserved for other fields
|
|
assert!(config.use_cache, "use_cache should retain default when not overridden");
|
|
assert!(
|
|
config.enable_quality_processing,
|
|
"enable_quality_processing should retain default when not overridden"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_error_handling_for_invalid_json() {
|
|
// Test that invalid format values produce errors (or are handled gracefully)
|
|
let invalid_json = json!({
|
|
"output_format": "InvalidFormat"
|
|
});
|
|
|
|
let result = serde_json::from_value::<ExtractionConfig>(invalid_json);
|
|
// The deserialization should either fail or parse to a valid state
|
|
// depending on how OutputFormat handles unknown values
|
|
if let Ok(config) = result {
|
|
let _ = config.output_format.to_string();
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_concurrent_extractions_override() {
|
|
let mcp_json = json!({
|
|
"max_concurrent_extractions": 16
|
|
});
|
|
|
|
let config: ExtractionConfig =
|
|
serde_json::from_value(mcp_json).expect("Failed to deserialize config with concurrent extractions");
|
|
|
|
assert_eq!(
|
|
config.max_concurrent_extractions,
|
|
Some(16),
|
|
"max_concurrent_extractions should be overridden to 16"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mcp_config_json_keys_case_sensitive() {
|
|
// Verify that config JSON keys are case-sensitive
|
|
let lowercase_json = json!({
|
|
"use_cache": true,
|
|
"force_ocr": false
|
|
});
|
|
|
|
let config: ExtractionConfig =
|
|
serde_json::from_value(lowercase_json).expect("Failed to deserialize lowercase config");
|
|
|
|
assert!(config.use_cache, "use_cache should be true");
|
|
assert!(!config.force_ocr, "force_ocr should be false");
|
|
|
|
// Note: serde by default fails on unknown fields, so camelCase would fail
|
|
// This test documents the expected behavior
|
|
}
|