crates/kreuzberg/tests/orgmode_extractor_tests.rs

//! Comprehensive TDD test suite for Org Mode extraction
//!
//! This test suite validates Org Mode extraction capabilities.
//! Each test extracts an Org Mode file and validates:
//!
//! - Metadata extraction (title, author, date from #+TITLE, #+AUTHOR, #+DATE)
//! - Heading hierarchy (* ** ***)
//! - Table parsing with proper structure
//! - List extraction (ordered, unordered, nested)
//! - Inline formatting (*bold*, /italic/, =code=, ~strikethrough~)
//! - Properties drawer extraction (:PROPERTIES: ... :END:)
//! - Link syntax ([[url][description]])
//! - Code blocks (#+BEGIN_SRC ... #+END_SRC)
//! - Unicode and special character handling
//! - Content quality validation

#![cfg(feature = "office")]

use kreuzberg::core::config::ExtractionConfig;
use kreuzberg::core::extractor::extract_bytes;
use std::path::PathBuf;

/// Helper to resolve workspace root and construct test file paths
fn get_test_orgmode_path(filename: &str) -> PathBuf {
    let workspace_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .expect("Operation failed")
        .parent()
        .expect("Operation failed");
    workspace_root.join(format!("test_documents/org/{}", filename))
}

/// Helper to validate that content contains expected text
fn assert_contains_ci(content: &str, needle: &str, description: &str) {
    assert!(
        content.to_lowercase().contains(&needle.to_lowercase()),
        "Content should contain '{}' ({}). Content: {}",
        needle,
        description,
        &content[..std::cmp::min(200, content.len())]
    );
}

/// Helper to validate content doesn't contain undesired text
fn assert_not_contains_ci(content: &str, needle: &str, description: &str) {
    assert!(
        !content.to_lowercase().contains(&needle.to_lowercase()),
        "Content should NOT contain '{}' ({})",
        needle,
        description
    );
}

/// Test 1: Basic Org Mode extraction from simple.org
///
/// Validates:
/// - Successfully extracts Org Mode format
/// - Content is properly formatted without raw markup
/// - Basic document structure is preserved
#[tokio::test]
async fn test_orgmode_basic_extraction() {
    let test_file = get_test_orgmode_path("tables.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract Org Mode successfully");

    assert!(
        !result.content.is_empty(),
        "Content should not be empty for Org Mode file"
    );

    assert!(result.content.len() > 50, "Content should have substantial length");

    assert_not_contains_ci(&result.content, "#+TITLE", "Should not contain raw #+TITLE");
    assert_not_contains_ci(&result.content, "#+BEGIN_", "Should not contain raw #+BEGIN_");

    println!("✅ Org Mode basic extraction test passed!");
    println!("   Content length: {} bytes", result.content.len());
}

/// Test 2: Metadata extraction (title, author, date)
///
/// Validates:
/// - #+TITLE metadata is extracted
/// - #+AUTHOR metadata is extracted
/// - #+DATE metadata is extracted
#[tokio::test]
async fn test_orgmode_metadata_extraction() {
    let org_content = r#"#+TITLE: Test Document
#+AUTHOR: John Doe
#+DATE: 2024-01-15

* First Section
  Document content here.
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract metadata from Org Mode");

    assert!(
        !result.content.is_empty(),
        "Content should be extracted from Org Mode with metadata"
    );

    assert_contains_ci(&result.content, "First Section", "Should contain section heading");
    assert_contains_ci(&result.content, "content", "Should contain document content");

    println!("✅ Org Mode metadata extraction test passed!");
    println!("   Metadata fields: {}", result.metadata.additional.len());
    println!("   Content length: {} bytes", result.content.len());
}

/// Test 3: Heading hierarchy extraction
///
/// Validates:
/// - Single-level headings (*) are recognized
/// - Multi-level headings (**, ***, etc.) are recognized
/// - Heading structure is preserved
/// - Heading text is properly extracted
#[tokio::test]
async fn test_orgmode_headings() {
    let org_content = r#"* Top Level Heading
Text under top level.

** Second Level Heading
Text under second level.

*** Third Level Heading
Text under third level.

**** Fourth Level Heading
Deep nested content.
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract headings from Org Mode");

    assert_contains_ci(&result.content, "Top Level Heading", "Should contain level 1 heading");
    assert_contains_ci(
        &result.content,
        "Second Level Heading",
        "Should contain level 2 heading",
    );
    assert_contains_ci(&result.content, "Third Level Heading", "Should contain level 3 heading");
    assert_contains_ci(
        &result.content,
        "Fourth Level Heading",
        "Should contain level 4 heading",
    );

    println!("✅ Org Mode headings test passed!");
    println!("   All heading levels extracted successfully");
}

/// Test 4: Table extraction with proper structure
///
/// Validates:
/// - Tables are recognized and extracted
/// - Table headers are identified
/// - Table data rows are preserved
/// - Multiple tables in document are all extracted
#[tokio::test]
async fn test_orgmode_tables() {
    let test_file = get_test_orgmode_path("tables.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract tables from Org Mode");

    assert!(
        result.content.contains("Right") || result.content.contains("Left"),
        "Should contain table headers"
    );

    assert!(
        result.content.contains("12") || result.content.contains("123"),
        "Should contain table data"
    );

    let table_count = result.content.matches("Right").count();
    assert!(table_count >= 1, "Should extract at least one table from document");

    println!("✅ Org Mode tables test passed!");
    println!("   Found approximately {} table(s)", table_count);
}

/// Test 5: Table with complex structure and multiline cells
///
/// Validates:
/// - Multiline table cells are handled
/// - Complex table structures are preserved
/// - Table captions are extracted
#[tokio::test]
async fn test_orgmode_tables_complex() {
    let test_file = get_test_orgmode_path("tables.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract complex tables from Org Mode");

    assert!(
        result.content.contains("Centered Header")
            || result.content.contains("Left Aligned")
            || result.content.contains("Right Aligned"),
        "Should contain multiline table headers"
    );

    assert!(
        result.content.contains("span multiple lines")
            || result.content.contains("First")
            || result.content.contains("Second"),
        "Should contain multiline table cell content"
    );

    println!("✅ Org Mode complex tables test passed!");
}

/// Test 6: Ordered and unordered list extraction
///
/// Validates:
/// - Unordered lists (- items) are recognized
/// - Ordered lists (1., 2., etc.) are recognized
/// - List items are properly extracted
/// - Nested lists are handled
#[tokio::test]
async fn test_orgmode_lists() {
    let org_content = r#"* Lists Section

** Unordered List
- First item
- Second item
- Third item

** Ordered List
1. One
2. Two
3. Three

** Mixed and Nested
- Item A
  - Nested A1
  - Nested A2
- Item B
  1. Sub-ordered
  2. Another sub
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract lists from Org Mode");

    assert_contains_ci(&result.content, "First item", "Should contain unordered list items");
    assert_contains_ci(&result.content, "Second item", "Should contain unordered list items");

    assert_contains_ci(&result.content, "One", "Should contain ordered list items");
    assert_contains_ci(&result.content, "Two", "Should contain ordered list items");

    assert_contains_ci(&result.content, "Nested", "Should contain nested list items");
    assert_contains_ci(&result.content, "Item A", "Should contain parent list items");

    println!("✅ Org Mode lists test passed!");
}

/// Test 7: Inline formatting (bold, italic, code, strikethrough)
///
/// Validates:
/// - *bold* text is preserved
/// - /italic/ text is preserved
/// - =code= text is preserved
/// - ~strikethrough~ text is preserved
/// - +underline+ text is handled
#[tokio::test]
async fn test_orgmode_inline_formatting() {
    let org_content = r#"* Formatting Test

This text has *bold emphasis* and /italic text/.

We also have =inline code= and ~strikethrough text~.

Some text with _underlined_ content.

Mixed formatting like *bold /italic/ text* is also supported.
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract inline formatting from Org Mode");

    assert_contains_ci(&result.content, "bold", "Should contain bold text");
    assert_contains_ci(&result.content, "italic", "Should contain italic text");
    assert_contains_ci(&result.content, "code", "Should contain code text");

    assert_contains_ci(&result.content, "emphasis", "Should preserve text content");
    assert_contains_ci(&result.content, "strikethrough", "Should preserve strikethrough text");

    println!("✅ Org Mode inline formatting test passed!");
}

/// Test 8: Properties drawer extraction
///
/// Validates:
/// - :PROPERTIES: drawers are recognized
/// - Property key-value pairs are extracted
/// - Custom properties are preserved
#[tokio::test]
async fn test_orgmode_properties() {
    let org_content = r#"* Task with Properties
:PROPERTIES:
:ID:       12345-abcde-67890
:CUSTOM:   custom-value
:STATUS:   active
:END:

This is content after properties.
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract properties from Org Mode");

    assert_contains_ci(&result.content, "Task with Properties", "Should contain heading");
    assert_contains_ci(&result.content, "content", "Should contain main content");

    println!("✅ Org Mode properties test passed!");
}

/// Test 9: Link syntax extraction with description priority
///
/// Validates:
/// - [[url]] syntax is recognized
/// - [[url][description]] syntax extracts description (not url)
/// - Internal links [[*heading]] are handled
/// - Link text is preserved (description when available)
#[tokio::test]
async fn test_orgmode_links() {
    let test_file = get_test_orgmode_path("links.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract links from Org Mode");

    assert_contains_ci(&result.content, "AT&T", "Should contain AT&T link description");
    assert_contains_ci(&result.content, "URL", "Should contain 'URL' link description");
    assert_contains_ci(&result.content, "email", "Should contain 'email' link description");
    assert_contains_ci(&result.content, "ampersand", "Should contain ampersand reference");
    assert_contains_ci(&result.content, "Links", "Should contain Links section header");

    println!("✅ Org Mode links test passed!");
}

/// Test 10: Code block extraction
///
/// Validates:
/// - #+BEGIN_SRC blocks are recognized
/// - #+BEGIN_SRC language blocks are identified
/// - Code content is preserved
/// - Multiple code blocks are extracted
#[tokio::test]
async fn test_orgmode_code_blocks() {
    let test_file = get_test_orgmode_path("../misc/readme.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract code blocks from Org Mode");

    assert!(
        result.content.contains("curl") || result.content.contains("bash") || result.content.contains("bash"),
        "Should contain code block content or language specification"
    );

    println!("✅ Org Mode code blocks test passed!");
}

/// Test 11: Multiple code blocks with different languages
///
/// Validates:
/// - Python code blocks are recognized
/// - Bash code blocks are recognized
/// - Language syntax is preserved
#[tokio::test]
async fn test_orgmode_code_blocks_multilang() {
    let test_file = get_test_orgmode_path("code-blocks.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract multi-language code blocks");

    assert_contains_ci(&result.content, "Python", "Should contain Python code reference");
    assert_contains_ci(&result.content, "Bash", "Should contain Bash code reference");
    assert_contains_ci(
        &result.content,
        "JavaScript",
        "Should contain JavaScript code reference",
    );

    println!("✅ Org Mode multi-language code blocks test passed!");
}

/// Test 12: Unicode character handling
///
/// Validates:
/// - International characters are preserved (é, ñ, ü, etc.)
/// - Mathematical symbols are preserved (∈, ©, °, etc.)
/// - Emoji characters are handled
/// - UTF-8 encoding is maintained
#[tokio::test]
async fn test_orgmode_unicode() {
    let org_content = r#"* Unicode Test

French: Café, naïve, résumé
German: Äpfel, Zürich
Spanish: Niño, Español
Russian: Привет

Mathematical: ∈ ∉ ⊂ ∪ ∩
Copyright: © ® ™
Degrees: 25°C

Emoji: 🎉 ✨ 📚 🌟
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract unicode characters from Org Mode");

    assert!(
        result.content.contains("Café") || result.content.contains("Caf"),
        "Should contain French text"
    );
    assert!(
        result.content.contains("°") || result.content.contains("Degrees"),
        "Should contain degree symbol"
    );
    assert!(
        result.content.contains("©") || result.content.contains("Copyright"),
        "Should contain copyright symbol"
    );

    let _ = result.content.chars().count();

    println!("✅ Org Mode unicode test passed!");
}

/// Test 13: Special character escaping
///
/// Validates:
/// - Escaped characters are handled properly
/// - Special Org Mode characters are escaped correctly
/// - Ampersands, brackets, etc. are preserved
#[tokio::test]
async fn test_orgmode_special_characters() {
    let org_content = r#"* Special Characters

This contains & ampersand, < less than, > greater than.

We have [brackets] and {braces} in text.

AT&T has an ampersand. Check prices @ 50%.

Backslash: \ and other symbols: | ~ `
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract special characters from Org Mode");

    assert_contains_ci(&result.content, "ampersand", "Should contain ampersand text");
    assert_contains_ci(&result.content, "AT&T", "Should preserve ampersands in company names");
    assert_contains_ci(&result.content, "bracket", "Should contain bracket text");

    println!("✅ Org Mode special characters test passed!");
}

/// Test 14: Content extraction quality
///
/// Validates:
/// - Content is non-empty
/// - Content is valid UTF-8
/// - No excessive control characters
/// - Content doesn't contain raw markup
#[tokio::test]
async fn test_orgmode_content_quality() {
    let test_file = get_test_orgmode_path("tables.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract Org Mode content successfully");

    let extracted = &result.content;

    assert!(!extracted.is_empty(), "Content should not be empty");

    let char_count = extracted.chars().count();
    assert!(char_count > 0, "Content should have valid UTF-8 characters");

    let control_chars = extracted
        .chars()
        .filter(|c| c.is_control() && *c != '\n' && *c != '\t' && *c != '\r')
        .count();
    assert!(
        control_chars < 5,
        "Should not have excessive control characters (found {})",
        control_chars
    );

    assert!(
        !extracted.contains("#+TITLE:"),
        "Should not contain raw #+TITLE directive"
    );
    assert!(
        !extracted.contains("#+BEGIN_SRC") || !extracted.contains("#+END_SRC"),
        "Should not contain unprocessed code block markers"
    );

    println!("✅ Org Mode content quality test passed!");
    println!("   Extracted {} bytes", extracted.len());
    println!("   Valid UTF-8: ✓");
    println!("   Control chars: ✓ (found {})", control_chars);
}

/// Test 15: MIME type detection and handling
///
/// Validates:
/// - MIME type is correctly set
/// - Extraction respects MIME type hints
/// - Content type remains consistent
#[tokio::test]
async fn test_orgmode_mime_type() {
    let org_content = r#"* Test Document
Content here.
"#;

    let result = extract_bytes(org_content.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract with correct MIME type");

    assert_eq!(
        result.mime_type, "text/x-org",
        "MIME type should be preserved as text/x-org"
    );

    println!("✅ Org Mode MIME type test passed!");
}

/// Test 16: Content compliance validation
///
/// Validates:
/// - Extracted content doesn't contain raw XML/HTML
/// - Content has proper UTF-8 encoding
/// - Content is well-formed
/// - No unprocessed Org Mode syntax remains
#[tokio::test]
async fn test_orgmode_content_compliance() {
    let test_file = get_test_orgmode_path("tables.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract Org Mode successfully for baseline comparison");

    let extracted = &result.content;

    assert!(
        !extracted.contains("#+TITLE"),
        "Should not contain raw #+TITLE directive"
    );
    assert!(
        !extracted.contains("#+AUTHOR"),
        "Should not contain raw #+AUTHOR directive"
    );
    assert!(!extracted.contains("#+DATE"), "Should not contain raw #+DATE directive");

    assert!(
        !extracted.contains("#+BEGIN_") || !extracted.contains("#+END_"),
        "Should have processed BEGIN/END blocks"
    );

    assert!(extracted.len() > 100, "Should have substantial content extracted");

    assert!(
        extracted.contains("#") || extracted.contains("Table"),
        "Should have heading structure or document content"
    );

    println!("✅ Org Mode content compliance test passed!");
    println!("   Raw markup: ✓ (not found)");
    println!("   UTF-8 encoding: ✓");
    println!("   Content structure: ✓");
}

/// Test 17: Empty document handling
///
/// Validates:
/// - Empty Org Mode documents are handled gracefully
/// - No panics occur
/// - Result is valid (even if empty)
#[tokio::test]
async fn test_orgmode_empty_document() {
    let empty_org = "";

    let result = extract_bytes(empty_org.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should handle empty Org Mode document");

    assert_eq!(
        result.mime_type, "text/x-org",
        "MIME type should be set even for empty documents"
    );

    println!("✅ Org Mode empty document test passed!");
}

/// Test 18: Document with only metadata
///
/// Validates:
/// - Documents with only metadata (no content) are handled
/// - Metadata is extracted
/// - No panic occurs
#[tokio::test]
async fn test_orgmode_metadata_only() {
    let metadata_only = r#"#+TITLE: Document Title
#+AUTHOR: Author Name
#+DATE: 2024-01-01
"#;

    let result = extract_bytes(metadata_only.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should handle metadata-only document");

    assert_eq!(result.mime_type, "text/x-org");

    println!("✅ Org Mode metadata-only document test passed!");
}

/// Test 19: Deeply nested document structure
///
/// Validates:
/// - Deep nesting (many levels) is handled correctly
/// - No stack overflow or performance issues
/// - All levels are extracted
#[tokio::test]
async fn test_orgmode_deep_nesting() {
    let deep_org = r#"* Level 1
Text at level 1
** Level 2
Text at level 2
*** Level 3
Text at level 3
**** Level 4
Text at level 4
***** Level 5
Text at level 5
****** Level 6
Text at level 6
"#;

    let result = extract_bytes(deep_org.as_bytes(), "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should handle deeply nested structure");

    assert_contains_ci(&result.content, "Level 1", "Should contain level 1");
    assert_contains_ci(&result.content, "Level 2", "Should contain level 2");
    assert_contains_ci(&result.content, "Level 6", "Should contain level 6");

    println!("✅ Org Mode deep nesting test passed!");
}

/// Test 20: Comprehensive document with mixed features
///
/// Validates:
/// - Document with all major features is extracted correctly
/// - All features work together
/// - Output is coherent and complete
#[tokio::test]
async fn test_orgmode_comprehensive_document() {
    let test_file = get_test_orgmode_path("comprehensive.org");
    if !test_file.exists() {
        println!("Skipping test: Test file not found at {:?}", test_file);
        return;
    }

    let content = std::fs::read(&test_file).expect("Should read Org Mode file");
    let result = extract_bytes(&content, "text/x-org", &ExtractionConfig::default())
        .await
        .expect("Should extract comprehensive document");

    assert_contains_ci(&result.content, "Headers", "Should contain Headers section");
    assert_contains_ci(&result.content, "Paragraphs", "Should contain Paragraphs section");
    assert_contains_ci(&result.content, "Block Quotes", "Should contain Block Quotes section");
    assert_contains_ci(&result.content, "Level 2", "Should contain Level 2 heading");
    assert_contains_ci(&result.content, "emphasis", "Should contain emphasis/formatted text");
    assert_contains_ci(
        &result.content,
        "embedded link",
        "Should contain 'embedded link' link description",
    );
    assert_contains_ci(&result.content, "AT&T", "Should contain AT&T link description");
    assert_contains_ci(&result.content, "special", "Should contain special characters section");

    println!("✅ Org Mode comprehensive document test passed!");
    println!("   Content extracted: {} bytes", result.content.len());
}

/// Test 21: Extraction statistics and summary
///
/// This test provides comprehensive statistics about Org Mode extraction
/// for validation and debugging purposes.
#[tokio::test]
async fn test_orgmode_extraction_statistics() {
    let test_files = vec!["tables.org", "../misc/readme.org"];

    println!("\n╔════════════════════════════════════════════════════════════╗");
    println!("║        Org Mode Extraction Statistics Report              ║");
    println!("╚════════════════════════════════════════════════════════════╝\n");

    let mut total_files = 0;
    let mut total_content_bytes = 0;
    let mut total_metadata_fields = 0;

    for orgmode_file in test_files {
        let test_file = get_test_orgmode_path(orgmode_file);
        if !test_file.exists() {
            println!("⚠ SKIP: {} (not found)", orgmode_file);
            continue;
        }

        match std::fs::read(&test_file) {
            Ok(content) => match extract_bytes(&content, "text/x-org", &ExtractionConfig::default()).await {
                Ok(result) => {
                    total_files += 1;
                    total_content_bytes += result.content.len();
                    total_metadata_fields += result.metadata.additional.len();

                    println!("✓ {}", orgmode_file);
                    println!("  Content: {} bytes", result.content.len());
                    println!("  Metadata fields: {}", result.metadata.additional.len());

                    if !result.metadata.additional.is_empty() {
                        let keys: Vec<String> = result.metadata.additional.keys().map(|k| k.to_string()).collect();
                        println!("  Keys: {}", keys.join(", "));
                    }

                    if result.content.contains("#") {
                        println!("  Structure: ✓ (headings detected)");
                    }
                    if result.content.contains("|") {
                        println!("  Tables: ✓ (detected)");
                    }
                    if result.content.contains("-") || result.content.contains("1.") {
                        println!("  Lists: ✓ (detected)");
                    }

                    println!();
                }
                Err(e) => {
                    println!("✗ {} - Error: {:?}", orgmode_file, e);
                    println!();
                }
            },
            Err(e) => {
                println!("✗ {} - Read error: {:?}", orgmode_file, e);
                println!();
            }
        }
    }

    println!("╔════════════════════════════════════════════════════════════╗");
    println!("║                    Summary Statistics                      ║");
    println!("╠════════════════════════════════════════════════════════════╣");
    println!("║ Total files processed: {:44} ║", total_files);
    println!("║ Total content bytes:   {:44} ║", total_content_bytes);
    println!("║ Total metadata fields: {:44} ║", total_metadata_fields);
    println!(
        "║ Average content size:  {:44} ║",
        total_content_bytes.checked_div(total_files).unwrap_or(0)
    );
    println!(
        "║ Average metadata/file: {:44} ║",
        total_metadata_fields.checked_div(total_files).unwrap_or(0)
    );
    println!("╚════════════════════════════════════════════════════════════╝\n");

    println!("✅ Org Mode extraction statistics generated successfully!");
}