crates/kreuzberg/tests/csv_integration.rs

//! CSV and spreadsheet integration tests.
//!
//! Tests for CSV and TSV extraction.
//! Validates data extraction, custom delimiters, quoted fields, and edge cases.

use kreuzberg::core::config::ExtractionConfig;
use kreuzberg::core::extractor::extract_bytes;

mod helpers;

/// Test basic CSV extraction - simple comma-separated values.
#[tokio::test]
async fn test_csv_basic_extraction() {
    let config = ExtractionConfig::default();

    let csv_content = b"Name,Age,City\nAlice,30,NYC\nBob,25,LA";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert_eq!(extraction.mime_type, "text/csv");
    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(extraction.content.contains("Name"), "Should contain 'Name' header");
    assert!(extraction.content.contains("Age"), "Should contain 'Age' header");
    assert!(extraction.content.contains("City"), "Should contain 'City' header");

    assert!(extraction.content.contains("Alice"), "Should contain Alice row");
    assert!(extraction.content.contains("30"), "Should contain Alice's age");
    assert!(extraction.content.contains("NYC"), "Should contain Alice's city");

    assert!(extraction.content.contains("Bob"), "Should contain Bob row");
    assert!(extraction.content.contains("25"), "Should contain Bob's age");
    assert!(extraction.content.contains("LA"), "Should contain Bob's city");
}

/// Test CSV with headers - first row as headers.
#[tokio::test]
async fn test_csv_with_headers() {
    let config = ExtractionConfig::default();

    let csv_content = b"Product,Price,Quantity\nApple,1.50,100\nBanana,0.75,200\nOrange,2.00,150";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(extraction.content.contains("Product"), "Should contain Product header");
    assert!(extraction.content.contains("Price"), "Should contain Price header");
    assert!(
        extraction.content.contains("Quantity"),
        "Should contain Quantity header"
    );

    assert!(
        extraction.content.contains("Apple")
            && extraction.content.contains("1.50")
            && extraction.content.contains("100")
    );
    assert!(
        extraction.content.contains("Banana")
            && extraction.content.contains("0.75")
            && extraction.content.contains("200")
    );
    assert!(
        extraction.content.contains("Orange")
            && extraction.content.contains("2.00")
            && extraction.content.contains("150")
    );
}

/// Test CSV with custom delimiter - tab and semicolon.
#[tokio::test]
async fn test_csv_custom_delimiter() {
    let config = ExtractionConfig::default();

    let csv_content = b"Name;Age;City\nAlice;30;NYC\nBob;25;LA";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(!extraction.content.is_empty(), "Content should be extracted");

    assert!(extraction.content.contains("Alice"), "Should contain Alice");
    assert!(extraction.content.contains("30"), "Should contain age");
    assert!(extraction.content.contains("NYC"), "Should contain city");
}

/// Test TSV (Tab-Separated Values) file.
#[tokio::test]
async fn test_tsv_file() {
    let config = ExtractionConfig::default();

    let tsv_content = b"Name\tAge\tCity\nAlice\t30\tNYC\nBob\t25\tLA";

    let extraction = match extract_bytes(tsv_content, "text/tab-separated-values", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: TSV extraction not available");
            return;
        }
    };

    assert_eq!(extraction.mime_type, "text/tab-separated-values");
    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(extraction.content.contains("Name"), "Should contain Name header");
    assert!(extraction.content.contains("Age"), "Should contain Age header");
    assert!(extraction.content.contains("City"), "Should contain City header");
    assert!(extraction.content.contains("Alice"), "Should contain Alice");
    assert!(extraction.content.contains("Bob"), "Should contain Bob");
    assert!(extraction.content.contains("30") && extraction.content.contains("NYC"));
    assert!(extraction.content.contains("25") && extraction.content.contains("LA"));
}

/// Test CSV with quoted fields - fields containing commas.
#[tokio::test]
async fn test_csv_quoted_fields() {
    let config = ExtractionConfig::default();

    let csv_content =
        b"Name,Description,Price\n\"Smith, John\",\"Product A, premium\",100\n\"Doe, Jane\",\"Product B, standard\",50";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(extraction.content.contains("Smith"), "Should contain Smith");
    assert!(extraction.content.contains("John"), "Should contain John");
    assert!(extraction.content.contains("Doe"), "Should contain Doe");
    assert!(extraction.content.contains("Jane"), "Should contain Jane");

    assert!(extraction.content.contains("Product A") || extraction.content.contains("premium"));
    assert!(extraction.content.contains("Product B") || extraction.content.contains("standard"));

    assert!(extraction.content.contains("100") && extraction.content.contains("50"));
}

/// Test CSV with special characters - Unicode, newlines in fields.
#[tokio::test]
async fn test_csv_special_characters() {
    let config = ExtractionConfig::default();

    let csv_content = "Name,City,Emoji\nAlice,Tokyo 東京,🎉\nBob,París,✅\nCarlos,Москва,🌍".as_bytes();

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(!extraction.content.is_empty(), "Special characters should be handled");

    assert!(extraction.content.contains("Alice"), "Should contain Alice");
    assert!(extraction.content.contains("Bob"), "Should contain Bob");
    assert!(extraction.content.contains("Carlos"), "Should contain Carlos");

    assert!(extraction.content.contains("Tokyo") || extraction.content.contains("東京"));
    assert!(extraction.content.contains("París") || extraction.content.contains("Paris"));
}

/// Test CSV with large file - 10,000+ rows (streaming).
#[tokio::test]
async fn test_csv_large_file() {
    let config = ExtractionConfig::default();

    let mut csv_content = "ID,Name,Value\n".to_string();
    for i in 1..=10_000 {
        csv_content.push_str(&format!("{},Item{},{}.00\n", i, i, i * 10));
    }

    let extraction = match extract_bytes(csv_content.as_bytes(), "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(!extraction.content.is_empty(), "Large CSV should be processed");

    assert!(
        extraction.content.len() > 1000,
        "Large CSV content should be substantial"
    );

    assert!(extraction.content.contains("Item1") || extraction.content.contains("10.00"));

    assert!(extraction.content.contains("Item5000") || extraction.content.contains("50000.00"));

    assert!(extraction.content.contains("Item10000") || extraction.content.contains("100000.00"));
}

/// Test malformed CSV - inconsistent columns.
#[tokio::test]
async fn test_csv_malformed() {
    let config = ExtractionConfig::default();

    let csv_content = b"Name,Age,City\nAlice,30\nBob,25,LA,Extra\nCarlos,35,SF";

    let result = extract_bytes(csv_content, "text/csv", &config).await;

    assert!(
        result.is_ok() || result.is_err(),
        "Should handle malformed CSV gracefully"
    );

    if let Ok(extraction) = result {
        assert!(!extraction.content.is_empty());
    }
}

/// Test empty CSV file.
#[tokio::test]
async fn test_csv_empty() {
    let config = ExtractionConfig::default();

    let empty_csv = b"";

    let result = extract_bytes(empty_csv, "text/csv", &config).await;

    assert!(result.is_ok() || result.is_err(), "Should handle empty CSV gracefully");
}

/// Test CSV with only headers.
#[tokio::test]
async fn test_csv_headers_only() {
    let config = ExtractionConfig::default();

    let csv_content = b"Name,Age,City";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(
        extraction.content.contains("Name") || !extraction.content.is_empty(),
        "Headers should be extracted"
    );
}

/// Test CSV with blank lines.
#[tokio::test]
async fn test_csv_blank_lines() {
    let config = ExtractionConfig::default();

    let csv_content = b"Name,Age\nAlice,30\n\nBob,25\n\nCarlos,35";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(extraction.content.contains("Alice") || extraction.content.contains("Bob"));
}

/// Test CSV with numeric data.
#[tokio::test]
async fn test_csv_numeric_data() {
    let config = ExtractionConfig::default();

    let csv_content = b"ID,Price,Quantity,Discount\n1,19.99,100,0.15\n2,29.99,50,0.20\n3,9.99,200,0.10";

    let extraction = match extract_bytes(csv_content, "text/csv", &config).await {
        Ok(result) => result,
        Err(_) => {
            println!("Skipping test: CSV extraction not available");
            return;
        }
    };

    assert!(
        extraction.chunks.is_none(),
        "Chunks should be None without chunking config"
    );
    assert!(
        extraction.detected_languages.is_none(),
        "Language detection not enabled"
    );
    assert!(!extraction.tables.is_empty(), "CSV should produce table structures");
    assert_eq!(extraction.tables.len(), 1, "CSV should have one table");
    assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");
    assert!(
        !extraction.tables[0].markdown.is_empty(),
        "Table should have markdown representation"
    );

    assert!(extraction.content.contains("Price"), "Should contain Price header");
    assert!(
        extraction.content.contains("Quantity"),
        "Should contain Quantity header"
    );
    assert!(
        extraction.content.contains("Discount"),
        "Should contain Discount header"
    );

    assert!(extraction.content.contains("19.99"), "Should contain first price");
    assert!(extraction.content.contains("100"), "Should contain first quantity");
    assert!(extraction.content.contains("0.15"), "Should contain first discount");

    assert!(extraction.content.contains("29.99"), "Should contain second price");
    assert!(extraction.content.contains("50"), "Should contain second quantity");

    assert!(extraction.content.contains("9.99"), "Should contain third price");
    assert!(extraction.content.contains("200"), "Should contain third quantity");
}
Nomad changes 2026-06-01 23:40:55 +02:00			`//! CSV and spreadsheet integration tests.`
			`//!`
			`//! Tests for CSV and TSV extraction.`
			`//! Validates data extraction, custom delimiters, quoted fields, and edge cases.`

			`use kreuzberg::core::config::ExtractionConfig;`
			`use kreuzberg::core::extractor::extract_bytes;`

			`mod helpers;`

			`/// Test basic CSV extraction - simple comma-separated values.`
			`#[tokio::test]`
			`async fn test_csv_basic_extraction() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"Name,Age,City\nAlice,30,NYC\nBob,25,LA";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert_eq!(extraction.mime_type, "text/csv");`
			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(extraction.content.contains("Name"), "Should contain 'Name' header");`
			`assert!(extraction.content.contains("Age"), "Should contain 'Age' header");`
			`assert!(extraction.content.contains("City"), "Should contain 'City' header");`

			`assert!(extraction.content.contains("Alice"), "Should contain Alice row");`
			`assert!(extraction.content.contains("30"), "Should contain Alice's age");`
			`assert!(extraction.content.contains("NYC"), "Should contain Alice's city");`

			`assert!(extraction.content.contains("Bob"), "Should contain Bob row");`
			`assert!(extraction.content.contains("25"), "Should contain Bob's age");`
			`assert!(extraction.content.contains("LA"), "Should contain Bob's city");`
			`}`

			`/// Test CSV with headers - first row as headers.`
			`#[tokio::test]`
			`async fn test_csv_with_headers() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"Product,Price,Quantity\nApple,1.50,100\nBanana,0.75,200\nOrange,2.00,150";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(extraction.content.contains("Product"), "Should contain Product header");`
			`assert!(extraction.content.contains("Price"), "Should contain Price header");`
			`assert!(`
			`extraction.content.contains("Quantity"),`
			`"Should contain Quantity header"`
			`);`

			`assert!(`
			`extraction.content.contains("Apple")`
			`&& extraction.content.contains("1.50")`
			`&& extraction.content.contains("100")`
			`);`
			`assert!(`
			`extraction.content.contains("Banana")`
			`&& extraction.content.contains("0.75")`
			`&& extraction.content.contains("200")`
			`);`
			`assert!(`
			`extraction.content.contains("Orange")`
			`&& extraction.content.contains("2.00")`
			`&& extraction.content.contains("150")`
			`);`
			`}`

			`/// Test CSV with custom delimiter - tab and semicolon.`
			`#[tokio::test]`
			`async fn test_csv_custom_delimiter() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"Name;Age;City\nAlice;30;NYC\nBob;25;LA";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(!extraction.content.is_empty(), "Content should be extracted");`

			`assert!(extraction.content.contains("Alice"), "Should contain Alice");`
			`assert!(extraction.content.contains("30"), "Should contain age");`
			`assert!(extraction.content.contains("NYC"), "Should contain city");`
			`}`

			`/// Test TSV (Tab-Separated Values) file.`
			`#[tokio::test]`
			`async fn test_tsv_file() {`
			`let config = ExtractionConfig::default();`

			`let tsv_content = b"Name\tAge\tCity\nAlice\t30\tNYC\nBob\t25\tLA";`

			`let extraction = match extract_bytes(tsv_content, "text/tab-separated-values", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: TSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert_eq!(extraction.mime_type, "text/tab-separated-values");`
			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(extraction.content.contains("Name"), "Should contain Name header");`
			`assert!(extraction.content.contains("Age"), "Should contain Age header");`
			`assert!(extraction.content.contains("City"), "Should contain City header");`
			`assert!(extraction.content.contains("Alice"), "Should contain Alice");`
			`assert!(extraction.content.contains("Bob"), "Should contain Bob");`
			`assert!(extraction.content.contains("30") && extraction.content.contains("NYC"));`
			`assert!(extraction.content.contains("25") && extraction.content.contains("LA"));`
			`}`

			`/// Test CSV with quoted fields - fields containing commas.`
			`#[tokio::test]`
			`async fn test_csv_quoted_fields() {`
			`let config = ExtractionConfig::default();`

			`let csv_content =`
			`b"Name,Description,Price\n\"Smith, John\",\"Product A, premium\",100\n\"Doe, Jane\",\"Product B, standard\",50";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(extraction.content.contains("Smith"), "Should contain Smith");`
			`assert!(extraction.content.contains("John"), "Should contain John");`
			`assert!(extraction.content.contains("Doe"), "Should contain Doe");`
			`assert!(extraction.content.contains("Jane"), "Should contain Jane");`

			`assert!(extraction.content.contains("Product A") \|\| extraction.content.contains("premium"));`
			`assert!(extraction.content.contains("Product B") \|\| extraction.content.contains("standard"));`

			`assert!(extraction.content.contains("100") && extraction.content.contains("50"));`
			`}`

			`/// Test CSV with special characters - Unicode, newlines in fields.`
			`#[tokio::test]`
			`async fn test_csv_special_characters() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = "Name,City,Emoji\nAlice,Tokyo 東京,🎉\nBob,París,✅\nCarlos,Москва,🌍".as_bytes();`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(!extraction.content.is_empty(), "Special characters should be handled");`

			`assert!(extraction.content.contains("Alice"), "Should contain Alice");`
			`assert!(extraction.content.contains("Bob"), "Should contain Bob");`
			`assert!(extraction.content.contains("Carlos"), "Should contain Carlos");`

			`assert!(extraction.content.contains("Tokyo") \|\| extraction.content.contains("東京"));`
			`assert!(extraction.content.contains("París") \|\| extraction.content.contains("Paris"));`
			`}`

			`/// Test CSV with large file - 10,000+ rows (streaming).`
			`#[tokio::test]`
			`async fn test_csv_large_file() {`
			`let config = ExtractionConfig::default();`

			`let mut csv_content = "ID,Name,Value\n".to_string();`
			`for i in 1..=10_000 {`
			`csv_content.push_str(&format!("{},Item{},{}.00\n", i, i, i * 10));`
			`}`

			`let extraction = match extract_bytes(csv_content.as_bytes(), "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(!extraction.content.is_empty(), "Large CSV should be processed");`

			`assert!(`
			`extraction.content.len() > 1000,`
			`"Large CSV content should be substantial"`
			`);`

			`assert!(extraction.content.contains("Item1") \|\| extraction.content.contains("10.00"));`

			`assert!(extraction.content.contains("Item5000") \|\| extraction.content.contains("50000.00"));`

			`assert!(extraction.content.contains("Item10000") \|\| extraction.content.contains("100000.00"));`
			`}`

			`/// Test malformed CSV - inconsistent columns.`
			`#[tokio::test]`
			`async fn test_csv_malformed() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"Name,Age,City\nAlice,30\nBob,25,LA,Extra\nCarlos,35,SF";`

			`let result = extract_bytes(csv_content, "text/csv", &config).await;`

			`assert!(`
			`result.is_ok() \|\| result.is_err(),`
			`"Should handle malformed CSV gracefully"`
			`);`

			`if let Ok(extraction) = result {`
			`assert!(!extraction.content.is_empty());`
			`}`
			`}`

			`/// Test empty CSV file.`
			`#[tokio::test]`
			`async fn test_csv_empty() {`
			`let config = ExtractionConfig::default();`

			`let empty_csv = b"";`

			`let result = extract_bytes(empty_csv, "text/csv", &config).await;`

			`assert!(result.is_ok() \|\| result.is_err(), "Should handle empty CSV gracefully");`
			`}`

			`/// Test CSV with only headers.`
			`#[tokio::test]`
			`async fn test_csv_headers_only() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"Name,Age,City";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(`
			`extraction.content.contains("Name") \|\| !extraction.content.is_empty(),`
			`"Headers should be extracted"`
			`);`
			`}`

			`/// Test CSV with blank lines.`
			`#[tokio::test]`
			`async fn test_csv_blank_lines() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"Name,Age\nAlice,30\n\nBob,25\n\nCarlos,35";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(extraction.content.contains("Alice") \|\| extraction.content.contains("Bob"));`
			`}`

			`/// Test CSV with numeric data.`
			`#[tokio::test]`
			`async fn test_csv_numeric_data() {`
			`let config = ExtractionConfig::default();`

			`let csv_content = b"ID,Price,Quantity,Discount\n1,19.99,100,0.15\n2,29.99,50,0.20\n3,9.99,200,0.10";`

			`let extraction = match extract_bytes(csv_content, "text/csv", &config).await {`
			`Ok(result) => result,`
			`Err(_) => {`
			`println!("Skipping test: CSV extraction not available");`
			`return;`
			`}`
			`};`

			`assert!(`
			`extraction.chunks.is_none(),`
			`"Chunks should be None without chunking config"`
			`);`
			`assert!(`
			`extraction.detected_languages.is_none(),`
			`"Language detection not enabled"`
			`);`
			`assert!(!extraction.tables.is_empty(), "CSV should produce table structures");`
			`assert_eq!(extraction.tables.len(), 1, "CSV should have one table");`
			`assert!(!extraction.tables[0].cells.is_empty(), "Table should have rows");`
			`assert!(`
			`!extraction.tables[0].markdown.is_empty(),`
			`"Table should have markdown representation"`
			`);`

			`assert!(extraction.content.contains("Price"), "Should contain Price header");`
			`assert!(`
			`extraction.content.contains("Quantity"),`
			`"Should contain Quantity header"`
			`);`
			`assert!(`
			`extraction.content.contains("Discount"),`
			`"Should contain Discount header"`
			`);`

			`assert!(extraction.content.contains("19.99"), "Should contain first price");`
			`assert!(extraction.content.contains("100"), "Should contain first quantity");`
			`assert!(extraction.content.contains("0.15"), "Should contain first discount");`

			`assert!(extraction.content.contains("29.99"), "Should contain second price");`
			`assert!(extraction.content.contains("50"), "Should contain second quantity");`

			`assert!(extraction.content.contains("9.99"), "Should contain third price");`
			`assert!(extraction.content.contains("200"), "Should contain third quantity");`
			`}`