fil/tools/benchmark-harness/tests/aggregate_schema.rs

use benchmark_harness::aggregate::aggregate_new_format;
use benchmark_harness::types::{
    BenchmarkResult, ErrorKind, FrameworkCapabilities, OcrStatus, OutputFormat, PerformanceMetrics, QualityMetrics,
};
use std::path::PathBuf;
use std::time::Duration;

fn make_benchmark_result(
    framework: &str,
    output_format: OutputFormat,
    file_name: &str,
    ocr: bool,
    success: bool,
    quality: Option<QualityMetrics>,
) -> BenchmarkResult {
    BenchmarkResult {
        framework: framework.to_string(),
        output_format,
        file_path: PathBuf::from(file_name),
        file_size: 10240,
        success,
        error_message: if success { None } else { Some("test error".to_string()) },
        error_kind: if success {
            ErrorKind::None
        } else {
            ErrorKind::FrameworkError
        },
        duration: Duration::from_millis(100),
        extraction_duration: Some(Duration::from_millis(80)),
        subprocess_overhead: Some(Duration::from_millis(20)),
        metrics: PerformanceMetrics {
            peak_memory_bytes: 100_000_000,
            avg_cpu_percent: 50.0,
            throughput_bytes_per_sec: 102_400.0,
            p50_memory_bytes: 90_000_000,
            p95_memory_bytes: 95_000_000,
            p99_memory_bytes: 99_000_000,
        },
        quality,
        iterations: vec![],
        statistics: None,
        cold_start_duration: Some(Duration::from_millis(500)),
        file_extension: "pdf".to_string(),
        framework_capabilities: FrameworkCapabilities::default(),
        pdf_metadata: None,
        ocr_status: if ocr { OcrStatus::Used } else { OcrStatus::NotUsed },
        extracted_text: None,
    }
}

#[test]
fn test_schema_version_2_4_0() {
    let results = vec![make_benchmark_result(
        "kreuzberg-markdown-baseline",
        OutputFormat::Markdown,
        "test.pdf",
        false,
        true,
        Some(QualityMetrics {
            f1_score_text: 0.95,
            f1_score_numeric: 0.90,
            f1_score_layout: Some(0.88),
            quality_score: 0.91,
            missing_tokens: vec![],
            extra_tokens: vec![],
            correct: true,
        }),
    )];

    let aggregated = aggregate_new_format(&results);
    assert_eq!(aggregated.schema_version, "2.4.0");
}

#[test]
fn test_per_fixture_results_populated() {
    let results = vec![
        make_benchmark_result(
            "kreuzberg-markdown-baseline",
            OutputFormat::Markdown,
            "fixture_1.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.95,
                f1_score_numeric: 0.90,
                f1_score_layout: Some(0.88),
                quality_score: 0.91,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
        make_benchmark_result(
            "kreuzberg-markdown-baseline",
            OutputFormat::Markdown,
            "fixture_2.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.92,
                f1_score_numeric: 0.88,
                f1_score_layout: Some(0.85),
                quality_score: 0.88,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
    ];

    let aggregated = aggregate_new_format(&results);

    assert!(!aggregated.per_fixture_results.is_empty());
    assert_eq!(aggregated.per_fixture_results.len(), 2);

    // Check that fixture_id is correctly extracted from file path
    let fixture_ids: Vec<String> = aggregated
        .per_fixture_results
        .iter()
        .map(|r| r.fixture_id.clone())
        .collect();
    assert!(fixture_ids.contains(&"fixture_1".to_string()));
    assert!(fixture_ids.contains(&"fixture_2".to_string()));

    // Check that output_format is preserved
    for row in &aggregated.per_fixture_results {
        assert_eq!(row.output_format, OutputFormat::Markdown);
    }
}

#[test]
fn test_plaintext_has_no_layout_percentiles() {
    let results = vec![
        make_benchmark_result(
            "pdfplumber",
            OutputFormat::Plaintext,
            "fixture_1.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.90,
                f1_score_numeric: 0.85,
                f1_score_layout: None, // Plaintext mode has no layout
                quality_score: 0.88,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
        make_benchmark_result(
            "pdfplumber",
            OutputFormat::Plaintext,
            "fixture_2.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.91,
                f1_score_numeric: 0.86,
                f1_score_layout: None,
                quality_score: 0.89,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
    ];

    let aggregated = aggregate_new_format(&results);

    // Find the plaintext aggregation
    let plaintext_key = aggregated
        .by_framework_mode
        .keys()
        .find(|k| k.contains("plaintext"))
        .cloned();

    assert!(plaintext_key.is_some(), "Expected to find plaintext aggregation key");

    if let Some(key) = plaintext_key
        && let Some(agg) = aggregated.by_framework_mode.get(&key)
        && let Some(pdf_ft) = agg.by_file_type.get("pdf")
        && let Some(perf) = &pdf_ft.no_ocr
        && let Some(quality) = &perf.quality
    {
        assert_eq!(quality.f1_layout_p50, None);
        assert_eq!(quality.f1_layout_p95, None);
        assert_eq!(quality.f1_layout_p99, None);
    }
}

#[test]
fn test_output_format_in_aggregation_key() {
    let results = vec![
        make_benchmark_result(
            "kreuzberg",
            OutputFormat::Markdown,
            "test.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.95,
                f1_score_numeric: 0.90,
                f1_score_layout: Some(0.88),
                quality_score: 0.91,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
        make_benchmark_result(
            "kreuzberg",
            OutputFormat::Plaintext,
            "test.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.92,
                f1_score_numeric: 0.88,
                f1_score_layout: None,
                quality_score: 0.90,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
    ];

    let aggregated = aggregate_new_format(&results);

    // Should have two separate aggregations: one for markdown, one for plaintext
    let markdown_key = aggregated.by_framework_mode.keys().find(|k| k.contains("markdown"));
    let plaintext_key = aggregated.by_framework_mode.keys().find(|k| k.contains("plaintext"));

    assert!(markdown_key.is_some(), "Expected markdown aggregation");
    assert!(plaintext_key.is_some(), "Expected plaintext aggregation");
}

#[test]
fn test_plaintext_frameworks_excluded_from_sf1_ranking() {
    let results = vec![
        // Markdown framework for PDF
        make_benchmark_result(
            "kreuzberg-markdown",
            OutputFormat::Markdown,
            "test.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.95,
                f1_score_numeric: 0.90,
                f1_score_layout: Some(0.88),
                quality_score: 0.91,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
        // Plaintext-only framework
        make_benchmark_result(
            "pdfplumber",
            OutputFormat::Plaintext,
            "test.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.92,
                f1_score_numeric: 0.88,
                f1_score_layout: None,
                quality_score: 0.90,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
    ];

    let aggregated = aggregate_new_format(&results);

    // plaintext frameworks should NOT appear in pdf_sf1_ranking_markdown
    for ranked in &aggregated.comparison.pdf_sf1_ranking_markdown {
        assert!(!ranked.framework_mode.contains("pdfplumber"));
    }

    // markdown frameworks SHOULD appear in pdf_sf1_ranking_markdown
    let has_markdown = aggregated
        .comparison
        .pdf_sf1_ranking_markdown
        .iter()
        .any(|r| r.framework_mode.contains("kreuzberg-markdown"));
    assert!(has_markdown, "Expected markdown framework in SF1 ranking");
}

#[test]
fn test_quality_percentiles_all_three() {
    let results = vec![
        make_benchmark_result(
            "test-framework",
            OutputFormat::Markdown,
            "fixture_1.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.95,
                f1_score_numeric: 0.90,
                f1_score_layout: Some(0.88),
                quality_score: 0.91,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
        make_benchmark_result(
            "test-framework",
            OutputFormat::Markdown,
            "fixture_2.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.80,
                f1_score_numeric: 0.75,
                f1_score_layout: Some(0.70),
                quality_score: 0.75,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: false,
            }),
        ),
        make_benchmark_result(
            "test-framework",
            OutputFormat::Markdown,
            "fixture_3.pdf",
            false,
            true,
            Some(QualityMetrics {
                f1_score_text: 0.92,
                f1_score_numeric: 0.87,
                f1_score_layout: Some(0.85),
                quality_score: 0.88,
                missing_tokens: vec![],
                extra_tokens: vec![],
                correct: true,
            }),
        ),
    ];

    let aggregated = aggregate_new_format(&results);

    // Find the aggregation with quality metrics
    let has_quality_percentiles = aggregated.by_framework_mode.values().any(|agg| {
        agg.by_file_type.values().any(|ft| {
            [ft.no_ocr.as_ref(), ft.with_ocr.as_ref()]
                .into_iter()
                .flatten()
                .any(|perf| {
                    if let Some(q) = &perf.quality {
                        // Check that all three percentiles are present
                        q.f1_text_p50 > 0.0
                            && q.f1_text_p95 > 0.0
                            && q.f1_text_p99 >= 0.0
                            && q.quality_score_p50 > 0.0
                            && q.quality_score_p95 > 0.0
                            && q.quality_score_p99 >= 0.0
                    } else {
                        false
                    }
                })
        })
    });

    assert!(
        has_quality_percentiles,
        "Expected quality percentiles with p50, p95, p99"
    );
}

#[test]
fn test_ocr_flag_in_per_fixture() {
    let results = vec![
        make_benchmark_result(
            "test-framework",
            OutputFormat::Markdown,
            "no_ocr.pdf",
            false,
            true,
            None,
        ),
        make_benchmark_result(
            "test-framework",
            OutputFormat::Markdown,
            "with_ocr.png",
            true,
            true,
            None,
        ),
    ];

    let aggregated = aggregate_new_format(&results);

    let no_ocr_row = aggregated.per_fixture_results.iter().find(|r| r.fixture_id == "no_ocr");
    let with_ocr_row = aggregated
        .per_fixture_results
        .iter()
        .find(|r| r.fixture_id == "with_ocr");

    assert!(no_ocr_row.is_some());
    assert!(with_ocr_row.is_some());
    assert!(!no_ocr_row.unwrap().ocr);
    assert!(with_ocr_row.unwrap().ocr);
}

#[test]
fn test_empty_results() {
    let results = vec![];
    let aggregated = aggregate_new_format(&results);

    assert_eq!(aggregated.schema_version, "2.4.0");
    assert!(aggregated.by_framework_mode.is_empty());
    assert!(aggregated.per_fixture_results.is_empty());
    assert_eq!(aggregated.metadata.total_results, 0);
}