158 lines
5.0 KiB
Rust
158 lines
5.0 KiB
Rust
//! TODO: Restored from 245539484 alef-migration cleanup. Currently exercises
|
|
//! pub(crate) APIs that the migration deliberately narrowed; gated until
|
|
//! either (a) these APIs are re-exposed publicly, or (b) the test is
|
|
//! rewritten against the public extraction surface.
|
|
|
|
#![cfg(any())]
|
|
|
|
// Original content preserved below; recompiled once gating cfg drops.
|
|
// Disabled by the file-level cfg(any()) above.
|
|
|
|
/*
|
|
#![cfg(feature = "otel")]
|
|
|
|
use std::sync::{Arc, Mutex};
|
|
use tracing::Subscriber;
|
|
use tracing::span::{Attributes, Id};
|
|
use tracing_subscriber::Layer;
|
|
use tracing_subscriber::layer::{Context, SubscriberExt};
|
|
use tracing_subscriber::registry::LookupSpan;
|
|
|
|
/// Simple span name collector for testing.
|
|
///
|
|
/// This layer collects span names as they are created to verify
|
|
/// that instrumentation is working correctly.
|
|
struct SpanCollector {
|
|
spans: Arc<Mutex<Vec<String>>>,
|
|
}
|
|
|
|
impl<S: Subscriber + for<'a> LookupSpan<'a>> Layer<S> for SpanCollector {
|
|
fn on_new_span(&self, attrs: &Attributes<'_>, _id: &Id, _ctx: Context<'_, S>) {
|
|
self.spans
|
|
.lock()
|
|
.expect("Operation failed")
|
|
.push(attrs.metadata().name().to_string());
|
|
}
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_cache_instrumentation() {
|
|
use kreuzberg::cache::GenericCache;
|
|
use tempfile::tempdir;
|
|
|
|
let spans = Arc::new(Mutex::new(Vec::new()));
|
|
let collector = SpanCollector { spans: spans.clone() };
|
|
|
|
let subscriber = tracing_subscriber::registry().with(collector);
|
|
let _guard = tracing::subscriber::set_default(subscriber);
|
|
|
|
let temp_dir = tempdir().expect("Operation failed");
|
|
let cache = GenericCache::new(
|
|
"test".to_string(),
|
|
Some(temp_dir.path().to_str().expect("Operation failed").to_string()),
|
|
30.0,
|
|
500.0,
|
|
1000.0,
|
|
)
|
|
.expect("Operation failed");
|
|
|
|
cache
|
|
.set_default("test_key", b"test data".to_vec(), None)
|
|
.expect("Operation failed");
|
|
|
|
let _ = cache.get_default("test_key", None).expect("Value not found");
|
|
|
|
let span_names = spans.lock().expect("Operation failed");
|
|
assert!(span_names.contains(&"set".to_string()), "Expected 'set' span");
|
|
assert!(span_names.contains(&"get".to_string()), "Expected 'get' span");
|
|
}
|
|
|
|
#[cfg(feature = "ocr")]
|
|
#[tokio::test]
|
|
async fn test_ocr_instrumentation() {
|
|
use kreuzberg::ocr::processor::OcrProcessor;
|
|
use kreuzberg::ocr::types::TesseractConfig;
|
|
use tempfile::tempdir;
|
|
|
|
let spans = Arc::new(Mutex::new(Vec::new()));
|
|
let collector = SpanCollector { spans: spans.clone() };
|
|
|
|
let subscriber = tracing_subscriber::registry().with(collector);
|
|
let _guard = tracing::subscriber::set_default(subscriber);
|
|
|
|
let temp_dir = tempdir().expect("Operation failed");
|
|
let processor = OcrProcessor::new(Some(temp_dir.path().to_path_buf())).expect("Operation failed");
|
|
|
|
let mut test_image = Vec::new();
|
|
let img = image::ImageBuffer::from_fn(1, 1, |_, _| image::Rgb([255u8, 255u8, 255u8]));
|
|
img.write_to(&mut std::io::Cursor::new(&mut test_image), image::ImageFormat::Png)
|
|
.expect("Operation failed");
|
|
|
|
let config = TesseractConfig {
|
|
output_format: "text".to_string(),
|
|
use_cache: false,
|
|
..TesseractConfig::default()
|
|
};
|
|
|
|
let _ = processor.process_image(&test_image, &config);
|
|
|
|
let span_names = spans.lock().expect("Operation failed");
|
|
assert!(
|
|
span_names.contains(&"process_image".to_string()),
|
|
"Expected 'process_image' span"
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_registry_instrumentation() {
|
|
use kreuzberg::plugins::registry::DocumentExtractorRegistry;
|
|
|
|
let spans = Arc::new(Mutex::new(Vec::new()));
|
|
let collector = SpanCollector { spans: spans.clone() };
|
|
|
|
let subscriber = tracing_subscriber::registry().with(collector);
|
|
let _guard = tracing::subscriber::set_default(subscriber);
|
|
|
|
let registry = DocumentExtractorRegistry::new();
|
|
|
|
let _ = registry.get("application/pdf");
|
|
|
|
let span_names = spans.lock().expect("Operation failed");
|
|
assert!(
|
|
span_names.contains(&"get".to_string()),
|
|
"Expected 'get' span from registry"
|
|
);
|
|
}
|
|
|
|
#[cfg(all(feature = "pdf", feature = "office"))]
|
|
#[tokio::test]
|
|
async fn test_span_hierarchy() {
|
|
use kreuzberg::core::config::ExtractionConfig;
|
|
use kreuzberg::core::extractor::extract_bytes;
|
|
|
|
let spans = Arc::new(Mutex::new(Vec::new()));
|
|
let collector = SpanCollector { spans: spans.clone() };
|
|
|
|
let subscriber = tracing_subscriber::registry().with(collector);
|
|
let _guard = tracing::subscriber::set_default(subscriber);
|
|
|
|
let test_content = b"Hello, World!";
|
|
let config = ExtractionConfig::default();
|
|
|
|
let _ = extract_bytes(test_content, "text/plain", &config).await;
|
|
|
|
let span_names = spans.lock().expect("Operation failed");
|
|
assert!(
|
|
span_names.contains(&"extract_bytes".to_string()),
|
|
"Expected 'extract_bytes' span"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_span_collector_creation() {
|
|
let spans = Arc::new(Mutex::new(Vec::new()));
|
|
let _collector = SpanCollector { spans };
|
|
}
|
|
|
|
*/
|