This commit is contained in:
157
crates/kreuzberg/tests/instrumentation_test.rs
Normal file
157
crates/kreuzberg/tests/instrumentation_test.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
//! TODO: Restored from 245539484 alef-migration cleanup. Currently exercises
|
||||
//! pub(crate) APIs that the migration deliberately narrowed; gated until
|
||||
//! either (a) these APIs are re-exposed publicly, or (b) the test is
|
||||
//! rewritten against the public extraction surface.
|
||||
|
||||
#![cfg(any())]
|
||||
|
||||
// Original content preserved below; recompiled once gating cfg drops.
|
||||
// Disabled by the file-level cfg(any()) above.
|
||||
|
||||
/*
|
||||
#![cfg(feature = "otel")]
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tracing::Subscriber;
|
||||
use tracing::span::{Attributes, Id};
|
||||
use tracing_subscriber::Layer;
|
||||
use tracing_subscriber::layer::{Context, SubscriberExt};
|
||||
use tracing_subscriber::registry::LookupSpan;
|
||||
|
||||
/// Simple span name collector for testing.
|
||||
///
|
||||
/// This layer collects span names as they are created to verify
|
||||
/// that instrumentation is working correctly.
|
||||
struct SpanCollector {
|
||||
spans: Arc<Mutex<Vec<String>>>,
|
||||
}
|
||||
|
||||
impl<S: Subscriber + for<'a> LookupSpan<'a>> Layer<S> for SpanCollector {
|
||||
fn on_new_span(&self, attrs: &Attributes<'_>, _id: &Id, _ctx: Context<'_, S>) {
|
||||
self.spans
|
||||
.lock()
|
||||
.expect("Operation failed")
|
||||
.push(attrs.metadata().name().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_instrumentation() {
|
||||
use kreuzberg::cache::GenericCache;
|
||||
use tempfile::tempdir;
|
||||
|
||||
let spans = Arc::new(Mutex::new(Vec::new()));
|
||||
let collector = SpanCollector { spans: spans.clone() };
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(collector);
|
||||
let _guard = tracing::subscriber::set_default(subscriber);
|
||||
|
||||
let temp_dir = tempdir().expect("Operation failed");
|
||||
let cache = GenericCache::new(
|
||||
"test".to_string(),
|
||||
Some(temp_dir.path().to_str().expect("Operation failed").to_string()),
|
||||
30.0,
|
||||
500.0,
|
||||
1000.0,
|
||||
)
|
||||
.expect("Operation failed");
|
||||
|
||||
cache
|
||||
.set_default("test_key", b"test data".to_vec(), None)
|
||||
.expect("Operation failed");
|
||||
|
||||
let _ = cache.get_default("test_key", None).expect("Value not found");
|
||||
|
||||
let span_names = spans.lock().expect("Operation failed");
|
||||
assert!(span_names.contains(&"set".to_string()), "Expected 'set' span");
|
||||
assert!(span_names.contains(&"get".to_string()), "Expected 'get' span");
|
||||
}
|
||||
|
||||
#[cfg(feature = "ocr")]
|
||||
#[tokio::test]
|
||||
async fn test_ocr_instrumentation() {
|
||||
use kreuzberg::ocr::processor::OcrProcessor;
|
||||
use kreuzberg::ocr::types::TesseractConfig;
|
||||
use tempfile::tempdir;
|
||||
|
||||
let spans = Arc::new(Mutex::new(Vec::new()));
|
||||
let collector = SpanCollector { spans: spans.clone() };
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(collector);
|
||||
let _guard = tracing::subscriber::set_default(subscriber);
|
||||
|
||||
let temp_dir = tempdir().expect("Operation failed");
|
||||
let processor = OcrProcessor::new(Some(temp_dir.path().to_path_buf())).expect("Operation failed");
|
||||
|
||||
let mut test_image = Vec::new();
|
||||
let img = image::ImageBuffer::from_fn(1, 1, |_, _| image::Rgb([255u8, 255u8, 255u8]));
|
||||
img.write_to(&mut std::io::Cursor::new(&mut test_image), image::ImageFormat::Png)
|
||||
.expect("Operation failed");
|
||||
|
||||
let config = TesseractConfig {
|
||||
output_format: "text".to_string(),
|
||||
use_cache: false,
|
||||
..TesseractConfig::default()
|
||||
};
|
||||
|
||||
let _ = processor.process_image(&test_image, &config);
|
||||
|
||||
let span_names = spans.lock().expect("Operation failed");
|
||||
assert!(
|
||||
span_names.contains(&"process_image".to_string()),
|
||||
"Expected 'process_image' span"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_registry_instrumentation() {
|
||||
use kreuzberg::plugins::registry::DocumentExtractorRegistry;
|
||||
|
||||
let spans = Arc::new(Mutex::new(Vec::new()));
|
||||
let collector = SpanCollector { spans: spans.clone() };
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(collector);
|
||||
let _guard = tracing::subscriber::set_default(subscriber);
|
||||
|
||||
let registry = DocumentExtractorRegistry::new();
|
||||
|
||||
let _ = registry.get("application/pdf");
|
||||
|
||||
let span_names = spans.lock().expect("Operation failed");
|
||||
assert!(
|
||||
span_names.contains(&"get".to_string()),
|
||||
"Expected 'get' span from registry"
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(all(feature = "pdf", feature = "office"))]
|
||||
#[tokio::test]
|
||||
async fn test_span_hierarchy() {
|
||||
use kreuzberg::core::config::ExtractionConfig;
|
||||
use kreuzberg::core::extractor::extract_bytes;
|
||||
|
||||
let spans = Arc::new(Mutex::new(Vec::new()));
|
||||
let collector = SpanCollector { spans: spans.clone() };
|
||||
|
||||
let subscriber = tracing_subscriber::registry().with(collector);
|
||||
let _guard = tracing::subscriber::set_default(subscriber);
|
||||
|
||||
let test_content = b"Hello, World!";
|
||||
let config = ExtractionConfig::default();
|
||||
|
||||
let _ = extract_bytes(test_content, "text/plain", &config).await;
|
||||
|
||||
let span_names = spans.lock().expect("Operation failed");
|
||||
assert!(
|
||||
span_names.contains(&"extract_bytes".to_string()),
|
||||
"Expected 'extract_bytes' span"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_span_collector_creation() {
|
||||
let spans = Arc::new(Mutex::new(Vec::new()));
|
||||
let _collector = SpanCollector { spans };
|
||||
}
|
||||
|
||||
*/
|
||||
Reference in New Issue
Block a user