crates/kreuzberg/tests/pdf_hierarchy_quality.rs

//! TODO: Restored from 245539484 alef-migration cleanup. Currently exercises
//! pub(crate) APIs that the migration deliberately narrowed; gated until
//! either (a) these APIs are re-exposed publicly, or (b) the test is
//! rewritten against the public extraction surface.

#![cfg(any())]

// Original content preserved below; recompiled once gating cfg drops.
// Disabled by the file-level cfg(any()) above.

/*
//! PDF hierarchy quality assessment tests.
//!
//! This module tests PDF text hierarchy extraction quality by comparing against ground truth annotations.
//! Measures precision, recall, F1 score, and level accuracy to ensure the hierarchy detection
//! algorithm works well on real document structures.
//!
//! Test philosophy:
//! - Define ground truth hierarchies for representative PDF documents
//! - Measure how well extracted hierarchies match ground truth
//! - Assert minimum quality thresholds for precision/recall/F1
//! - Verify correct hierarchy level assignments

#![cfg(feature = "pdf")]

use kreuzberg::pdf::hierarchy::{
    BoundingBox, HierarchyLevel, KMeansResult, TextBlock, assign_hierarchy_levels,
    assign_hierarchy_levels_from_clusters, cluster_font_sizes,
};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;

/// A bounding box annotation from ground truth.
#[derive(Debug, Clone, Serialize, Deserialize)]
struct GroundTruthBBox {
    left: f32,
    top: f32,
    right: f32,
    bottom: f32,
}

impl GroundTruthBBox {
    /// Convert to kreuzberg BoundingBox
    fn to_bbox(&self) -> BoundingBox {
        BoundingBox {
            left: self.left,
            top: self.top,
            right: self.right,
            bottom: self.bottom,
        }
    }
}

/// A ground truth text block with hierarchy level annotation.
#[derive(Debug, Clone, Serialize, Deserialize)]
struct GroundTruthBlock {
    text: String,
    level: String,
    bbox: GroundTruthBBox,
}

/// A page of ground truth annotations.
#[derive(Debug, Clone, Serialize, Deserialize)]
struct GroundTruthPage {
    page_number: u32,
    blocks: Vec<GroundTruthBlock>,
}

/// A document with ground truth hierarchy annotations.
#[derive(Debug, Clone, Serialize, Deserialize)]
struct GroundTruthDocument {
    pdf_file: String,
    pages: Vec<GroundTruthPage>,
}

/// Root structure for ground truth JSON file.
#[derive(Debug, Clone, Serialize, Deserialize)]
struct GroundTruthFile {
    documents: Vec<GroundTruthDocument>,
}

/// Quality metrics for hierarchy extraction.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityMetrics {
    /// Precision: (correctly identified hierarchies) / (total extracted hierarchies)
    pub precision: f64,
    /// Recall: (correctly identified hierarchies) / (total ground truth hierarchies)
    pub recall: f64,
    /// F1 Score: harmonic mean of precision and recall
    pub f1_score: f64,
    /// Level accuracy: percentage of blocks assigned to correct hierarchy level
    pub level_accuracy: f64,
    /// Number of correctly identified hierarchy blocks
    pub true_positives: usize,
    /// Number of incorrectly extracted hierarchy blocks
    pub false_positives: usize,
    /// Number of missed ground truth hierarchy blocks
    pub false_negatives: usize,
    /// Number of blocks with correct hierarchy level
    pub correct_levels: usize,
    /// Total number of blocks evaluated
    pub total_blocks: usize,
}

impl QualityMetrics {
    /// Create new quality metrics from test results.
    fn new(
        true_positives: usize,
        false_positives: usize,
        false_negatives: usize,
        correct_levels: usize,
        total_blocks: usize,
    ) -> Self {
        let precision = if true_positives + false_positives > 0 {
            true_positives as f64 / (true_positives + false_positives) as f64
        } else {
            0.0
        };

        let recall = if true_positives + false_negatives > 0 {
            true_positives as f64 / (true_positives + false_negatives) as f64
        } else {
            0.0
        };

        let f1_score = if precision + recall > 0.0 {
            2.0 * precision * recall / (precision + recall)
        } else {
            0.0
        };

        let level_accuracy = if total_blocks > 0 {
            correct_levels as f64 / total_blocks as f64
        } else {
            0.0
        };

        Self {
            precision,
            recall,
            f1_score,
            level_accuracy,
            true_positives,
            false_positives,
            false_negatives,
            correct_levels,
            total_blocks,
        }
    }
}

/// Convert hierarchy level string to HierarchyLevel enum.
fn parse_level(level: &str) -> HierarchyLevel {
    match level {
        "H1" => HierarchyLevel::H1,
        "H2" => HierarchyLevel::H2,
        "H3" => HierarchyLevel::H3,
        "H4" => HierarchyLevel::H4,
        "H5" => HierarchyLevel::H5,
        "H6" => HierarchyLevel::H6,
        _ => HierarchyLevel::Body,
    }
}

/// Load ground truth annotations from JSON file.
///
/// Reads the hierarchy_ground_truth.json file and parses document annotations.
///
/// # Arguments
///
/// * `path` - Path to the ground truth JSON file
///
/// # Returns
///
/// Result containing the parsed GroundTruthFile or error message
fn load_ground_truth<P: AsRef<Path>>(path: P) -> Result<GroundTruthFile, String> {
    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
    serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))
}

/// Calculate quality metrics by comparing extracted hierarchies to ground truth.
///
/// Compares extracted text blocks with their hierarchy assignments to ground truth annotations.
/// Measures:
/// - Precision: correctly identified hierarchies / total extracted
/// - Recall: correctly identified hierarchies / total ground truth
/// - F1 Score: harmonic mean of precision and recall
/// - Level Accuracy: percentage of blocks with correct hierarchy level
///
/// # Arguments
///
/// * `extracted_blocks` - Vector of extracted HierarchyBlock objects
/// * `ground_truth_blocks` - Vector of ground truth blocks
///
/// # Returns
///
/// QualityMetrics struct with calculated precision, recall, F1, and level accuracy
fn calculate_quality_metrics(
    extracted_blocks: &[kreuzberg::pdf::hierarchy::HierarchyBlock],
    ground_truth_blocks: &[GroundTruthBlock],
) -> QualityMetrics {
    let mut true_positives = 0;
    let mut false_positives = 0;
    let mut correct_levels = 0;

    // For matching blocks, we use bounding box overlap and text similarity
    let mut matched_gt_indices: Vec<bool> = vec![false; ground_truth_blocks.len()];

    for extracted in extracted_blocks {
        let mut best_match_idx: Option<usize> = None;
        let mut best_overlap = 0.0;

        // Find the best matching ground truth block by bounding box overlap
        for (gt_idx, gt_block) in ground_truth_blocks.iter().enumerate() {
            if matched_gt_indices[gt_idx] {
                continue; // Already matched
            }

            let gt_bbox = gt_block.bbox.to_bbox();
            let overlap = extracted.bbox.iou(&gt_bbox);

            if overlap > best_overlap && overlap > 0.3 {
                best_overlap = overlap;
                best_match_idx = Some(gt_idx);
            }
        }

        if let Some(gt_idx) = best_match_idx {
            matched_gt_indices[gt_idx] = true;
            true_positives += 1;

            // Check if the hierarchy level matches
            let gt_level = parse_level(&ground_truth_blocks[gt_idx].level);
            if extracted.hierarchy_level == gt_level {
                correct_levels += 1;
            }
        } else {
            false_positives += 1;
        }
    }

    // Count unmatched ground truth blocks as false negatives
    let false_negatives = matched_gt_indices.iter().filter(|&&m| !m).count();

    let total_blocks = extracted_blocks.len().max(ground_truth_blocks.len());

    QualityMetrics::new(
        true_positives,
        false_positives,
        false_negatives,
        correct_levels,
        total_blocks,
    )
}

/// Create test text blocks from ground truth.
fn create_text_blocks_from_ground_truth(gt_blocks: &[GroundTruthBlock]) -> Vec<TextBlock> {
    gt_blocks
        .iter()
        .enumerate()
        .map(|(idx, gt_block)| {
            // Estimate font size from bbox height
            let bbox = gt_block.bbox.to_bbox();
            let font_size = match gt_block.level.as_str() {
                "H1" => 28.0,
                "H2" => 24.0,
                "H3" => 20.0,
                "H4" => 16.0,
                "H5" => 14.0,
                "H6" => 12.0,
                _ => 10.0, // Body
            };

            TextBlock {
                text: if gt_block.text.len() > 50 {
                    format!("{} (Block {})", gt_block.text.chars().take(50).collect::<String>(), idx)
                } else {
                    gt_block.text.clone()
                },
                bbox,
                font_size,
            }
        })
        .collect()
}

#[test]
fn test_hierarchy_quality_on_ground_truth() {
    // Load ground truth data
    let ground_truth_path = "tests/data/hierarchy_ground_truth.json";
    let ground_truth_file = load_ground_truth(ground_truth_path).expect("Failed to load ground truth file");

    println!(
        "\nLoaded {} documents from ground truth",
        ground_truth_file.documents.len()
    );

    let mut all_metrics: Vec<QualityMetrics> = Vec::new();

    // Process each document
    for doc in &ground_truth_file.documents {
        println!("\nProcessing document: {}", doc.pdf_file);

        for page in &doc.pages {
            println!("  Page {}: {} blocks", page.page_number, page.blocks.len());

            // Create text blocks from ground truth
            let text_blocks = create_text_blocks_from_ground_truth(&page.blocks);

            // Cluster by font size
            let k = (text_blocks.len() / 3).clamp(1, 6); // Estimate k clusters
            let clusters = cluster_font_sizes(&text_blocks, k).expect("Failed to cluster font sizes");

            println!(
                "    Created {} clusters from {} blocks",
                clusters.len(),
                text_blocks.len()
            );

            // Assign hierarchy levels from clusters
            let hierarchy_assignments = assign_hierarchy_levels_from_clusters(&text_blocks, &clusters);

            // Convert to HierarchyBlock format
            let extracted_blocks: Vec<kreuzberg::pdf::hierarchy::HierarchyBlock> = hierarchy_assignments
                .iter()
                .map(|(block, level)| kreuzberg::pdf::hierarchy::HierarchyBlock {
                    text: block.text.clone(),
                    bbox: block.bbox,
                    font_size: block.font_size,
                    hierarchy_level: *level,
                })
                .collect();

            // Calculate quality metrics
            let metrics = calculate_quality_metrics(&extracted_blocks, &page.blocks);
            all_metrics.push(metrics.clone());

            println!("    Precision:      {:.4}", metrics.precision);
            println!("    Recall:         {:.4}", metrics.recall);
            println!("    F1 Score:       {:.4}", metrics.f1_score);
            println!("    Level Accuracy: {:.4}", metrics.level_accuracy);
        }
    }

    // Calculate average metrics
    if !all_metrics.is_empty() {
        let avg_precision = all_metrics.iter().map(|m| m.precision).sum::<f64>() / all_metrics.len() as f64;
        let avg_recall = all_metrics.iter().map(|m| m.recall).sum::<f64>() / all_metrics.len() as f64;
        let avg_f1 = all_metrics.iter().map(|m| m.f1_score).sum::<f64>() / all_metrics.len() as f64;
        let avg_level_acc = all_metrics.iter().map(|m| m.level_accuracy).sum::<f64>() / all_metrics.len() as f64;

        println!("\n=== AVERAGE METRICS ACROSS ALL PAGES ===");
        println!("Average Precision:      {:.4}", avg_precision);
        println!("Average Recall:         {:.4}", avg_recall);
        println!("Average F1 Score:       {:.4}", avg_f1);
        println!("Average Level Accuracy: {:.4}", avg_level_acc);

        // Assert minimum F1 threshold
        assert!(
            avg_f1 > 0.85,
            "F1 score ({:.4}) must be greater than 0.85. Metrics: precision={:.4}, recall={:.4}, level_accuracy={:.4}",
            avg_f1,
            avg_precision,
            avg_recall,
            avg_level_acc
        );
    }
}

#[test]
fn test_hierarchy_clustering_consistency() {
    // Arrange: Create a simple document with clear hierarchy
    let blocks = vec![
        TextBlock {
            text: "Title".to_string(),
            bbox: BoundingBox {
                left: 0.0,
                top: 0.0,
                right: 100.0,
                bottom: 28.0,
            },
            font_size: 28.0,
        },
        TextBlock {
            text: "Subtitle".to_string(),
            bbox: BoundingBox {
                left: 0.0,
                top: 30.0,
                right: 100.0,
                bottom: 54.0,
            },
            font_size: 24.0,
        },
        TextBlock {
            text: "Section".to_string(),
            bbox: BoundingBox {
                left: 0.0,
                top: 60.0,
                right: 100.0,
                bottom: 80.0,
            },
            font_size: 20.0,
        },
        TextBlock {
            text: "Body paragraph".to_string(),
            bbox: BoundingBox {
                left: 0.0,
                top: 90.0,
                right: 100.0,
                bottom: 102.0,
            },
            font_size: 10.0,
        },
    ];

    // Act: Cluster and assign hierarchies
    let clusters = cluster_font_sizes(&blocks, 4).expect("Clustering failed");
    let assignments = assign_hierarchy_levels_from_clusters(&blocks, &clusters);

    // Assert: Verify hierarchy levels are correct
    assert_eq!(assignments.len(), 4);
    assert_eq!(assignments[0].1, HierarchyLevel::H1, "Largest text should be H1");
    assert_eq!(assignments[1].1, HierarchyLevel::H2, "Second largest should be H2");
    assert_eq!(assignments[2].1, HierarchyLevel::H3, "Third largest should be H3");
    assert_eq!(assignments[3].1, HierarchyLevel::Body, "Smallest text should be Body");

    // Assert: F1 score should be perfect for this simple case
    let quality_metrics = calculate_quality_metrics(
        &assignments
            .iter()
            .map(|(b, l)| kreuzberg::pdf::hierarchy::HierarchyBlock {
                text: b.text.clone(),
                bbox: b.bbox,
                font_size: b.font_size,
                hierarchy_level: *l,
            })
            .collect::<Vec<_>>(),
        &[
            GroundTruthBlock {
                text: "Title".to_string(),
                level: "H1".to_string(),
                bbox: GroundTruthBBox {
                    left: 0.0,
                    top: 0.0,
                    right: 100.0,
                    bottom: 28.0,
                },
            },
            GroundTruthBlock {
                text: "Subtitle".to_string(),
                level: "H2".to_string(),
                bbox: GroundTruthBBox {
                    left: 0.0,
                    top: 30.0,
                    right: 100.0,
                    bottom: 54.0,
                },
            },
            GroundTruthBlock {
                text: "Section".to_string(),
                level: "H3".to_string(),
                bbox: GroundTruthBBox {
                    left: 0.0,
                    top: 60.0,
                    right: 100.0,
                    bottom: 80.0,
                },
            },
            GroundTruthBlock {
                text: "Body paragraph".to_string(),
                level: "Body".to_string(),
                bbox: GroundTruthBBox {
                    left: 0.0,
                    top: 90.0,
                    right: 100.0,
                    bottom: 102.0,
                },
            },
        ],
    );

    println!("Consistency Test - F1 Score: {:.4}", quality_metrics.f1_score);
    assert!(
        quality_metrics.f1_score >= 0.8,
        "F1 score for simple hierarchy should be >= 0.8"
    );
}

#[test]
fn test_hierarchy_level_assignment() {
    // Arrange: Create blocks and KMeans result
    let blocks = vec![
        TextBlock {
            text: "Main Title".to_string(),
            bbox: BoundingBox {
                left: 50.0,
                top: 50.0,
                right: 150.0,
                bottom: 100.0,
            },
            font_size: 28.0,
        },
        TextBlock {
            text: "Section Title".to_string(),
            bbox: BoundingBox {
                left: 50.0,
                top: 120.0,
                right: 150.0,
                bottom: 160.0,
            },
            font_size: 20.0,
        },
        TextBlock {
            text: "Regular body text".to_string(),
            bbox: BoundingBox {
                left: 50.0,
                top: 180.0,
                right: 200.0,
                bottom: 200.0,
            },
            font_size: 12.0,
        },
    ];

    let kmeans_result = KMeansResult { labels: vec![0, 1, 2] };

    // Act: Assign hierarchy levels using KMeans result
    let result = assign_hierarchy_levels(&blocks, &kmeans_result);

    // Assert: Verify correct level assignments
    assert_eq!(result.len(), 3);
    assert_eq!(result[0].hierarchy_level, HierarchyLevel::H1);
    assert_eq!(result[1].hierarchy_level, HierarchyLevel::H2);
    assert_eq!(result[2].hierarchy_level, HierarchyLevel::H3);
}

#[test]
fn test_quality_metrics_calculation() {
    // Arrange: Create extracted blocks and ground truth
    let extracted = vec![
        kreuzberg::pdf::hierarchy::HierarchyBlock {
            text: "Title".to_string(),
            bbox: BoundingBox {
                left: 0.0,
                top: 0.0,
                right: 100.0,
                bottom: 20.0,
            },
            font_size: 28.0,
            hierarchy_level: HierarchyLevel::H1,
        },
        kreuzberg::pdf::hierarchy::HierarchyBlock {
            text: "Body".to_string(),
            bbox: BoundingBox {
                left: 0.0,
                top: 30.0,
                right: 100.0,
                bottom: 50.0,
            },
            font_size: 12.0,
            hierarchy_level: HierarchyLevel::Body,
        },
    ];

    let ground_truth = vec![
        GroundTruthBlock {
            text: "Title".to_string(),
            level: "H1".to_string(),
            bbox: GroundTruthBBox {
                left: 0.0,
                top: 0.0,
                right: 100.0,
                bottom: 20.0,
            },
        },
        GroundTruthBlock {
            text: "Body".to_string(),
            level: "Body".to_string(),
            bbox: GroundTruthBBox {
                left: 0.0,
                top: 30.0,
                right: 100.0,
                bottom: 50.0,
            },
        },
    ];

    // Act: Calculate metrics
    let metrics = calculate_quality_metrics(&extracted, &ground_truth);

    // Assert: Verify metrics
    assert_eq!(metrics.true_positives, 2);
    assert_eq!(metrics.false_positives, 0);
    assert_eq!(metrics.false_negatives, 0);
    assert_eq!(metrics.correct_levels, 2);
    assert!(metrics.precision > 0.99);
    assert!(metrics.recall > 0.99);
    assert!(metrics.f1_score > 0.99);
}

*/
Nomad changes 2026-06-01 23:40:55 +02:00			`//! TODO: Restored from 245539484 alef-migration cleanup. Currently exercises`
			`//! pub(crate) APIs that the migration deliberately narrowed; gated until`
			`//! either (a) these APIs are re-exposed publicly, or (b) the test is`
			`//! rewritten against the public extraction surface.`

			`#![cfg(any())]`

			`// Original content preserved below; recompiled once gating cfg drops.`
			`// Disabled by the file-level cfg(any()) above.`

			`/*`
			`//! PDF hierarchy quality assessment tests.`
			`//!`
			`//! This module tests PDF text hierarchy extraction quality by comparing against ground truth annotations.`
			`//! Measures precision, recall, F1 score, and level accuracy to ensure the hierarchy detection`
			`//! algorithm works well on real document structures.`
			`//!`
			`//! Test philosophy:`
			`//! - Define ground truth hierarchies for representative PDF documents`
			`//! - Measure how well extracted hierarchies match ground truth`
			`//! - Assert minimum quality thresholds for precision/recall/F1`
			`//! - Verify correct hierarchy level assignments`

			`#![cfg(feature = "pdf")]`

			`use kreuzberg::pdf::hierarchy::{`
			`BoundingBox, HierarchyLevel, KMeansResult, TextBlock, assign_hierarchy_levels,`
			`assign_hierarchy_levels_from_clusters, cluster_font_sizes,`
			`};`
			`use serde::{Deserialize, Serialize};`
			`use std::fs;`
			`use std::path::Path;`

			`/// A bounding box annotation from ground truth.`
			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`struct GroundTruthBBox {`
			`left: f32,`
			`top: f32,`
			`right: f32,`
			`bottom: f32,`
			`}`

			`impl GroundTruthBBox {`
			`/// Convert to kreuzberg BoundingBox`
			`fn to_bbox(&self) -> BoundingBox {`
			`BoundingBox {`
			`left: self.left,`
			`top: self.top,`
			`right: self.right,`
			`bottom: self.bottom,`
			`}`
			`}`
			`}`

			`/// A ground truth text block with hierarchy level annotation.`
			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`struct GroundTruthBlock {`
			`text: String,`
			`level: String,`
			`bbox: GroundTruthBBox,`
			`}`

			`/// A page of ground truth annotations.`
			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`struct GroundTruthPage {`
			`page_number: u32,`
			`blocks: Vec<GroundTruthBlock>,`
			`}`

			`/// A document with ground truth hierarchy annotations.`
			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`struct GroundTruthDocument {`
			`pdf_file: String,`
			`pages: Vec<GroundTruthPage>,`
			`}`

			`/// Root structure for ground truth JSON file.`
			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`struct GroundTruthFile {`
			`documents: Vec<GroundTruthDocument>,`
			`}`

			`/// Quality metrics for hierarchy extraction.`
			`#[derive(Debug, Clone, Serialize, Deserialize)]`
			`pub struct QualityMetrics {`
			`/// Precision: (correctly identified hierarchies) / (total extracted hierarchies)`
			`pub precision: f64,`
			`/// Recall: (correctly identified hierarchies) / (total ground truth hierarchies)`
			`pub recall: f64,`
			`/// F1 Score: harmonic mean of precision and recall`
			`pub f1_score: f64,`
			`/// Level accuracy: percentage of blocks assigned to correct hierarchy level`
			`pub level_accuracy: f64,`
			`/// Number of correctly identified hierarchy blocks`
			`pub true_positives: usize,`
			`/// Number of incorrectly extracted hierarchy blocks`
			`pub false_positives: usize,`
			`/// Number of missed ground truth hierarchy blocks`
			`pub false_negatives: usize,`
			`/// Number of blocks with correct hierarchy level`
			`pub correct_levels: usize,`
			`/// Total number of blocks evaluated`
			`pub total_blocks: usize,`
			`}`

			`impl QualityMetrics {`
			`/// Create new quality metrics from test results.`
			`fn new(`
			`true_positives: usize,`
			`false_positives: usize,`
			`false_negatives: usize,`
			`correct_levels: usize,`
			`total_blocks: usize,`
			`) -> Self {`
			`let precision = if true_positives + false_positives > 0 {`
			`true_positives as f64 / (true_positives + false_positives) as f64`
			`} else {`
			`0.0`
			`};`

			`let recall = if true_positives + false_negatives > 0 {`
			`true_positives as f64 / (true_positives + false_negatives) as f64`
			`} else {`
			`0.0`
			`};`

			`let f1_score = if precision + recall > 0.0 {`
			`2.0 * precision * recall / (precision + recall)`
			`} else {`
			`0.0`
			`};`

			`let level_accuracy = if total_blocks > 0 {`
			`correct_levels as f64 / total_blocks as f64`
			`} else {`
			`0.0`
			`};`

			`Self {`
			`precision,`
			`recall,`
			`f1_score,`
			`level_accuracy,`
			`true_positives,`
			`false_positives,`
			`false_negatives,`
			`correct_levels,`
			`total_blocks,`
			`}`
			`}`
			`}`

			`/// Convert hierarchy level string to HierarchyLevel enum.`
			`fn parse_level(level: &str) -> HierarchyLevel {`
			`match level {`
			`"H1" => HierarchyLevel::H1,`
			`"H2" => HierarchyLevel::H2,`
			`"H3" => HierarchyLevel::H3,`
			`"H4" => HierarchyLevel::H4,`
			`"H5" => HierarchyLevel::H5,`
			`"H6" => HierarchyLevel::H6,`
			`_ => HierarchyLevel::Body,`
			`}`
			`}`

			`/// Load ground truth annotations from JSON file.`
			`///`
			`/// Reads the hierarchy_ground_truth.json file and parses document annotations.`
			`///`
			`/// # Arguments`
			`///`
			/// * `path` - Path to the ground truth JSON file
			`///`
			`/// # Returns`
			`///`
			`/// Result containing the parsed GroundTruthFile or error message`
			`fn load_ground_truth<P: AsRef<Path>>(path: P) -> Result<GroundTruthFile, String> {`
			`let content = fs::read_to_string(path).map_err(\|e\| format!("Failed to read file: {}", e))?;`
			`serde_json::from_str(&content).map_err(\|e\| format!("Failed to parse JSON: {}", e))`
			`}`

			`/// Calculate quality metrics by comparing extracted hierarchies to ground truth.`
			`///`
			`/// Compares extracted text blocks with their hierarchy assignments to ground truth annotations.`
			`/// Measures:`
			`/// - Precision: correctly identified hierarchies / total extracted`
			`/// - Recall: correctly identified hierarchies / total ground truth`
			`/// - F1 Score: harmonic mean of precision and recall`
			`/// - Level Accuracy: percentage of blocks with correct hierarchy level`
			`///`
			`/// # Arguments`
			`///`
			/// * `extracted_blocks` - Vector of extracted HierarchyBlock objects
			/// * `ground_truth_blocks` - Vector of ground truth blocks
			`///`
			`/// # Returns`
			`///`
			`/// QualityMetrics struct with calculated precision, recall, F1, and level accuracy`
			`fn calculate_quality_metrics(`
			`extracted_blocks: &[kreuzberg::pdf::hierarchy::HierarchyBlock],`
			`ground_truth_blocks: &[GroundTruthBlock],`
			`) -> QualityMetrics {`
			`let mut true_positives = 0;`
			`let mut false_positives = 0;`
			`let mut correct_levels = 0;`

			`// For matching blocks, we use bounding box overlap and text similarity`
			`let mut matched_gt_indices: Vec<bool> = vec![false; ground_truth_blocks.len()];`

			`for extracted in extracted_blocks {`
			`let mut best_match_idx: Option<usize> = None;`
			`let mut best_overlap = 0.0;`

			`// Find the best matching ground truth block by bounding box overlap`
			`for (gt_idx, gt_block) in ground_truth_blocks.iter().enumerate() {`
			`if matched_gt_indices[gt_idx] {`
			`continue; // Already matched`
			`}`

			`let gt_bbox = gt_block.bbox.to_bbox();`
			`let overlap = extracted.bbox.iou(&gt_bbox);`

			`if overlap > best_overlap && overlap > 0.3 {`
			`best_overlap = overlap;`
			`best_match_idx = Some(gt_idx);`
			`}`
			`}`

			`if let Some(gt_idx) = best_match_idx {`
			`matched_gt_indices[gt_idx] = true;`
			`true_positives += 1;`

			`// Check if the hierarchy level matches`
			`let gt_level = parse_level(&ground_truth_blocks[gt_idx].level);`
			`if extracted.hierarchy_level == gt_level {`
			`correct_levels += 1;`
			`}`
			`} else {`
			`false_positives += 1;`
			`}`
			`}`

			`// Count unmatched ground truth blocks as false negatives`
			`let false_negatives = matched_gt_indices.iter().filter(\|&&m\| !m).count();`

			`let total_blocks = extracted_blocks.len().max(ground_truth_blocks.len());`

			`QualityMetrics::new(`
			`true_positives,`
			`false_positives,`
			`false_negatives,`
			`correct_levels,`
			`total_blocks,`
			`)`
			`}`

			`/// Create test text blocks from ground truth.`
			`fn create_text_blocks_from_ground_truth(gt_blocks: &[GroundTruthBlock]) -> Vec<TextBlock> {`
			`gt_blocks`
			`.iter()`
			`.enumerate()`
			`.map(\|(idx, gt_block)\| {`
			`// Estimate font size from bbox height`
			`let bbox = gt_block.bbox.to_bbox();`
			`let font_size = match gt_block.level.as_str() {`
			`"H1" => 28.0,`
			`"H2" => 24.0,`
			`"H3" => 20.0,`
			`"H4" => 16.0,`
			`"H5" => 14.0,`
			`"H6" => 12.0,`
			`_ => 10.0, // Body`
			`};`

			`TextBlock {`
			`text: if gt_block.text.len() > 50 {`
			`format!("{} (Block {})", gt_block.text.chars().take(50).collect::<String>(), idx)`
			`} else {`
			`gt_block.text.clone()`
			`},`
			`bbox,`
			`font_size,`
			`}`
			`})`
			`.collect()`
			`}`

			`#[test]`
			`fn test_hierarchy_quality_on_ground_truth() {`
			`// Load ground truth data`
			`let ground_truth_path = "tests/data/hierarchy_ground_truth.json";`
			`let ground_truth_file = load_ground_truth(ground_truth_path).expect("Failed to load ground truth file");`

			`println!(`
			`"\nLoaded {} documents from ground truth",`
			`ground_truth_file.documents.len()`
			`);`

			`let mut all_metrics: Vec<QualityMetrics> = Vec::new();`

			`// Process each document`
			`for doc in &ground_truth_file.documents {`
			`println!("\nProcessing document: {}", doc.pdf_file);`

			`for page in &doc.pages {`
			`println!(" Page {}: {} blocks", page.page_number, page.blocks.len());`

			`// Create text blocks from ground truth`
			`let text_blocks = create_text_blocks_from_ground_truth(&page.blocks);`

			`// Cluster by font size`
			`let k = (text_blocks.len() / 3).clamp(1, 6); // Estimate k clusters`
			`let clusters = cluster_font_sizes(&text_blocks, k).expect("Failed to cluster font sizes");`

			`println!(`
			`" Created {} clusters from {} blocks",`
			`clusters.len(),`
			`text_blocks.len()`
			`);`

			`// Assign hierarchy levels from clusters`
			`let hierarchy_assignments = assign_hierarchy_levels_from_clusters(&text_blocks, &clusters);`

			`// Convert to HierarchyBlock format`
			`let extracted_blocks: Vec<kreuzberg::pdf::hierarchy::HierarchyBlock> = hierarchy_assignments`
			`.iter()`
			`.map(\|(block, level)\| kreuzberg::pdf::hierarchy::HierarchyBlock {`
			`text: block.text.clone(),`
			`bbox: block.bbox,`
			`font_size: block.font_size,`
			`hierarchy_level: *level,`
			`})`
			`.collect();`

			`// Calculate quality metrics`
			`let metrics = calculate_quality_metrics(&extracted_blocks, &page.blocks);`
			`all_metrics.push(metrics.clone());`

			`println!(" Precision: {:.4}", metrics.precision);`
			`println!(" Recall: {:.4}", metrics.recall);`
			`println!(" F1 Score: {:.4}", metrics.f1_score);`
			`println!(" Level Accuracy: {:.4}", metrics.level_accuracy);`
			`}`
			`}`

			`// Calculate average metrics`
			`if !all_metrics.is_empty() {`
			`let avg_precision = all_metrics.iter().map(\|m\| m.precision).sum::<f64>() / all_metrics.len() as f64;`
			`let avg_recall = all_metrics.iter().map(\|m\| m.recall).sum::<f64>() / all_metrics.len() as f64;`
			`let avg_f1 = all_metrics.iter().map(\|m\| m.f1_score).sum::<f64>() / all_metrics.len() as f64;`
			`let avg_level_acc = all_metrics.iter().map(\|m\| m.level_accuracy).sum::<f64>() / all_metrics.len() as f64;`

			`println!("\n=== AVERAGE METRICS ACROSS ALL PAGES ===");`
			`println!("Average Precision: {:.4}", avg_precision);`
			`println!("Average Recall: {:.4}", avg_recall);`
			`println!("Average F1 Score: {:.4}", avg_f1);`
			`println!("Average Level Accuracy: {:.4}", avg_level_acc);`

			`// Assert minimum F1 threshold`
			`assert!(`
			`avg_f1 > 0.85,`
			`"F1 score ({:.4}) must be greater than 0.85. Metrics: precision={:.4}, recall={:.4}, level_accuracy={:.4}",`
			`avg_f1,`
			`avg_precision,`
			`avg_recall,`
			`avg_level_acc`
			`);`
			`}`
			`}`

			`#[test]`
			`fn test_hierarchy_clustering_consistency() {`
			`// Arrange: Create a simple document with clear hierarchy`
			`let blocks = vec![`
			`TextBlock {`
			`text: "Title".to_string(),`
			`bbox: BoundingBox {`
			`left: 0.0,`
			`top: 0.0,`
			`right: 100.0,`
			`bottom: 28.0,`
			`},`
			`font_size: 28.0,`
			`},`
			`TextBlock {`
			`text: "Subtitle".to_string(),`
			`bbox: BoundingBox {`
			`left: 0.0,`
			`top: 30.0,`
			`right: 100.0,`
			`bottom: 54.0,`
			`},`
			`font_size: 24.0,`
			`},`
			`TextBlock {`
			`text: "Section".to_string(),`
			`bbox: BoundingBox {`
			`left: 0.0,`
			`top: 60.0,`
			`right: 100.0,`
			`bottom: 80.0,`
			`},`
			`font_size: 20.0,`
			`},`
			`TextBlock {`
			`text: "Body paragraph".to_string(),`
			`bbox: BoundingBox {`
			`left: 0.0,`
			`top: 90.0,`
			`right: 100.0,`
			`bottom: 102.0,`
			`},`
			`font_size: 10.0,`
			`},`
			`];`

			`// Act: Cluster and assign hierarchies`
			`let clusters = cluster_font_sizes(&blocks, 4).expect("Clustering failed");`
			`let assignments = assign_hierarchy_levels_from_clusters(&blocks, &clusters);`

			`// Assert: Verify hierarchy levels are correct`
			`assert_eq!(assignments.len(), 4);`
			`assert_eq!(assignments[0].1, HierarchyLevel::H1, "Largest text should be H1");`
			`assert_eq!(assignments[1].1, HierarchyLevel::H2, "Second largest should be H2");`
			`assert_eq!(assignments[2].1, HierarchyLevel::H3, "Third largest should be H3");`
			`assert_eq!(assignments[3].1, HierarchyLevel::Body, "Smallest text should be Body");`

			`// Assert: F1 score should be perfect for this simple case`
			`let quality_metrics = calculate_quality_metrics(`
			`&assignments`
			`.iter()`
			`.map(\|(b, l)\| kreuzberg::pdf::hierarchy::HierarchyBlock {`
			`text: b.text.clone(),`
			`bbox: b.bbox,`
			`font_size: b.font_size,`
			`hierarchy_level: *l,`
			`})`
			`.collect::<Vec<_>>(),`
			`&[`
			`GroundTruthBlock {`
			`text: "Title".to_string(),`
			`level: "H1".to_string(),`
			`bbox: GroundTruthBBox {`
			`left: 0.0,`
			`top: 0.0,`
			`right: 100.0,`
			`bottom: 28.0,`
			`},`
			`},`
			`GroundTruthBlock {`
			`text: "Subtitle".to_string(),`
			`level: "H2".to_string(),`
			`bbox: GroundTruthBBox {`
			`left: 0.0,`
			`top: 30.0,`
			`right: 100.0,`
			`bottom: 54.0,`
			`},`
			`},`
			`GroundTruthBlock {`
			`text: "Section".to_string(),`
			`level: "H3".to_string(),`
			`bbox: GroundTruthBBox {`
			`left: 0.0,`
			`top: 60.0,`
			`right: 100.0,`
			`bottom: 80.0,`
			`},`
			`},`
			`GroundTruthBlock {`
			`text: "Body paragraph".to_string(),`
			`level: "Body".to_string(),`
			`bbox: GroundTruthBBox {`
			`left: 0.0,`
			`top: 90.0,`
			`right: 100.0,`
			`bottom: 102.0,`
			`},`
			`},`
			`],`
			`);`

			`println!("Consistency Test - F1 Score: {:.4}", quality_metrics.f1_score);`
			`assert!(`
			`quality_metrics.f1_score >= 0.8,`
			`"F1 score for simple hierarchy should be >= 0.8"`
			`);`
			`}`

			`#[test]`
			`fn test_hierarchy_level_assignment() {`
			`// Arrange: Create blocks and KMeans result`
			`let blocks = vec![`
			`TextBlock {`
			`text: "Main Title".to_string(),`
			`bbox: BoundingBox {`
			`left: 50.0,`
			`top: 50.0,`
			`right: 150.0,`
			`bottom: 100.0,`
			`},`
			`font_size: 28.0,`
			`},`
			`TextBlock {`
			`text: "Section Title".to_string(),`
			`bbox: BoundingBox {`
			`left: 50.0,`
			`top: 120.0,`
			`right: 150.0,`
			`bottom: 160.0,`
			`},`
			`font_size: 20.0,`
			`},`
			`TextBlock {`
			`text: "Regular body text".to_string(),`
			`bbox: BoundingBox {`
			`left: 50.0,`
			`top: 180.0,`
			`right: 200.0,`
			`bottom: 200.0,`
			`},`
			`font_size: 12.0,`
			`},`
			`];`

			`let kmeans_result = KMeansResult { labels: vec![0, 1, 2] };`

			`// Act: Assign hierarchy levels using KMeans result`
			`let result = assign_hierarchy_levels(&blocks, &kmeans_result);`

			`// Assert: Verify correct level assignments`
			`assert_eq!(result.len(), 3);`
			`assert_eq!(result[0].hierarchy_level, HierarchyLevel::H1);`
			`assert_eq!(result[1].hierarchy_level, HierarchyLevel::H2);`
			`assert_eq!(result[2].hierarchy_level, HierarchyLevel::H3);`
			`}`

			`#[test]`
			`fn test_quality_metrics_calculation() {`
			`// Arrange: Create extracted blocks and ground truth`
			`let extracted = vec![`
			`kreuzberg::pdf::hierarchy::HierarchyBlock {`
			`text: "Title".to_string(),`
			`bbox: BoundingBox {`
			`left: 0.0,`
			`top: 0.0,`
			`right: 100.0,`
			`bottom: 20.0,`
			`},`
			`font_size: 28.0,`
			`hierarchy_level: HierarchyLevel::H1,`
			`},`
			`kreuzberg::pdf::hierarchy::HierarchyBlock {`
			`text: "Body".to_string(),`
			`bbox: BoundingBox {`
			`left: 0.0,`
			`top: 30.0,`
			`right: 100.0,`
			`bottom: 50.0,`
			`},`
			`font_size: 12.0,`
			`hierarchy_level: HierarchyLevel::Body,`
			`},`
			`];`

			`let ground_truth = vec![`
			`GroundTruthBlock {`
			`text: "Title".to_string(),`
			`level: "H1".to_string(),`
			`bbox: GroundTruthBBox {`
			`left: 0.0,`
			`top: 0.0,`
			`right: 100.0,`
			`bottom: 20.0,`
			`},`
			`},`
			`GroundTruthBlock {`
			`text: "Body".to_string(),`
			`level: "Body".to_string(),`
			`bbox: GroundTruthBBox {`
			`left: 0.0,`
			`top: 30.0,`
			`right: 100.0,`
			`bottom: 50.0,`
			`},`
			`},`
			`];`

			`// Act: Calculate metrics`
			`let metrics = calculate_quality_metrics(&extracted, &ground_truth);`

			`// Assert: Verify metrics`
			`assert_eq!(metrics.true_positives, 2);`
			`assert_eq!(metrics.false_positives, 0);`
			`assert_eq!(metrics.false_negatives, 0);`
			`assert_eq!(metrics.correct_levels, 2);`
			`assert!(metrics.precision > 0.99);`
			`assert!(metrics.recall > 0.99);`
			`assert!(metrics.f1_score > 0.99);`
			`}`

			`*/`