Files
fil/crates/kreuzberg/tests/instrumentation_test.rs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

158 lines
5.0 KiB
Rust

//! TODO: Restored from 245539484 alef-migration cleanup. Currently exercises
//! pub(crate) APIs that the migration deliberately narrowed; gated until
//! either (a) these APIs are re-exposed publicly, or (b) the test is
//! rewritten against the public extraction surface.
#![cfg(any())]
// Original content preserved below; recompiled once gating cfg drops.
// Disabled by the file-level cfg(any()) above.
/*
#![cfg(feature = "otel")]
use std::sync::{Arc, Mutex};
use tracing::Subscriber;
use tracing::span::{Attributes, Id};
use tracing_subscriber::Layer;
use tracing_subscriber::layer::{Context, SubscriberExt};
use tracing_subscriber::registry::LookupSpan;
/// Simple span name collector for testing.
///
/// This layer collects span names as they are created to verify
/// that instrumentation is working correctly.
struct SpanCollector {
spans: Arc<Mutex<Vec<String>>>,
}
impl<S: Subscriber + for<'a> LookupSpan<'a>> Layer<S> for SpanCollector {
fn on_new_span(&self, attrs: &Attributes<'_>, _id: &Id, _ctx: Context<'_, S>) {
self.spans
.lock()
.expect("Operation failed")
.push(attrs.metadata().name().to_string());
}
}
#[tokio::test]
async fn test_cache_instrumentation() {
use kreuzberg::cache::GenericCache;
use tempfile::tempdir;
let spans = Arc::new(Mutex::new(Vec::new()));
let collector = SpanCollector { spans: spans.clone() };
let subscriber = tracing_subscriber::registry().with(collector);
let _guard = tracing::subscriber::set_default(subscriber);
let temp_dir = tempdir().expect("Operation failed");
let cache = GenericCache::new(
"test".to_string(),
Some(temp_dir.path().to_str().expect("Operation failed").to_string()),
30.0,
500.0,
1000.0,
)
.expect("Operation failed");
cache
.set_default("test_key", b"test data".to_vec(), None)
.expect("Operation failed");
let _ = cache.get_default("test_key", None).expect("Value not found");
let span_names = spans.lock().expect("Operation failed");
assert!(span_names.contains(&"set".to_string()), "Expected 'set' span");
assert!(span_names.contains(&"get".to_string()), "Expected 'get' span");
}
#[cfg(feature = "ocr")]
#[tokio::test]
async fn test_ocr_instrumentation() {
use kreuzberg::ocr::processor::OcrProcessor;
use kreuzberg::ocr::types::TesseractConfig;
use tempfile::tempdir;
let spans = Arc::new(Mutex::new(Vec::new()));
let collector = SpanCollector { spans: spans.clone() };
let subscriber = tracing_subscriber::registry().with(collector);
let _guard = tracing::subscriber::set_default(subscriber);
let temp_dir = tempdir().expect("Operation failed");
let processor = OcrProcessor::new(Some(temp_dir.path().to_path_buf())).expect("Operation failed");
let mut test_image = Vec::new();
let img = image::ImageBuffer::from_fn(1, 1, |_, _| image::Rgb([255u8, 255u8, 255u8]));
img.write_to(&mut std::io::Cursor::new(&mut test_image), image::ImageFormat::Png)
.expect("Operation failed");
let config = TesseractConfig {
output_format: "text".to_string(),
use_cache: false,
..TesseractConfig::default()
};
let _ = processor.process_image(&test_image, &config);
let span_names = spans.lock().expect("Operation failed");
assert!(
span_names.contains(&"process_image".to_string()),
"Expected 'process_image' span"
);
}
#[tokio::test]
async fn test_registry_instrumentation() {
use kreuzberg::plugins::registry::DocumentExtractorRegistry;
let spans = Arc::new(Mutex::new(Vec::new()));
let collector = SpanCollector { spans: spans.clone() };
let subscriber = tracing_subscriber::registry().with(collector);
let _guard = tracing::subscriber::set_default(subscriber);
let registry = DocumentExtractorRegistry::new();
let _ = registry.get("application/pdf");
let span_names = spans.lock().expect("Operation failed");
assert!(
span_names.contains(&"get".to_string()),
"Expected 'get' span from registry"
);
}
#[cfg(all(feature = "pdf", feature = "office"))]
#[tokio::test]
async fn test_span_hierarchy() {
use kreuzberg::core::config::ExtractionConfig;
use kreuzberg::core::extractor::extract_bytes;
let spans = Arc::new(Mutex::new(Vec::new()));
let collector = SpanCollector { spans: spans.clone() };
let subscriber = tracing_subscriber::registry().with(collector);
let _guard = tracing::subscriber::set_default(subscriber);
let test_content = b"Hello, World!";
let config = ExtractionConfig::default();
let _ = extract_bytes(test_content, "text/plain", &config).await;
let span_names = spans.lock().expect("Operation failed");
assert!(
span_names.contains(&"extract_bytes".to_string()),
"Expected 'extract_bytes' span"
);
}
#[test]
fn test_span_collector_creation() {
let spans = Arc::new(Mutex::new(Vec::new()));
let _collector = SpanCollector { spans };
}
*/