crates/kreuzberg/tests/latex_extractor_tests.rs

//! Comprehensive LaTeX Extractor Tests
//!
//! This test suite defines the expected behavior for LaTeX extraction.
//!
//! Test Coverage:
//! - Basic content extraction (minimal.tex)
//! - Section hierarchy (basic_sections.tex)
//! - Text formatting (formatting.tex)
//! - Mathematical expressions (math.tex)
//! - Tables (tables.tex)
//! - Lists (lists.tex)
//! - Unicode handling (unicode.tex)
//!
//! Success Criteria:
//! - All tests passing (100%)
//! - No content loss (extract meaningful content)

#![cfg(feature = "office")]

use kreuzberg::core::config::ExtractionConfig;
use kreuzberg::extraction::derive::derive_extraction_result;
use kreuzberg::extractors::latex::LatexExtractor;
use kreuzberg::plugins::DocumentExtractor;
use std::fs;
use std::path::PathBuf;

/// Helper to get absolute path to test documents
fn test_file_path(filename: &str) -> PathBuf {
    let manifest_dir = env!("CARGO_MANIFEST_DIR");
    PathBuf::from(manifest_dir)
        .parent()
        .expect("Operation failed")
        .parent()
        .expect("Operation failed")
        .join("test_documents")
        .join("latex")
        .join(filename)
}

#[tokio::test]
async fn test_latex_minimal_extraction() {
    let content = fs::read(test_file_path("minimal.tex")).expect("Failed to read minimal.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract minimal LaTeX");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        !result.content.is_empty(),
        "FAIL: Extracted 0 bytes (current bug). Should extract content from minimal.tex"
    );

    assert!(
        result.content.contains("Hello World from LaTeX!"),
        "FAIL: Should extract 'Hello World from LaTeX!' but got: '{}'",
        result.content
    );
}

#[tokio::test]
async fn test_latex_metadata_extraction() {
    let content = fs::read(test_file_path("basic_sections.tex")).expect("Failed to read basic_sections.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX with metadata");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert_eq!(
        result.metadata.additional.get("title").and_then(|v| v.as_str()),
        Some("Test Document"),
        "FAIL: Should extract title 'Test Document' from \\title{{}} command"
    );

    assert_eq!(
        result.metadata.additional.get("author").and_then(|v| v.as_str()),
        Some("John Doe"),
        "FAIL: Should extract author 'John Doe' from \\author{{}} command"
    );

    assert_eq!(
        result.metadata.additional.get("date").and_then(|v| v.as_str()),
        Some("2025-12-07"),
        "FAIL: Should extract date '2025-12-07' from \\date{{}} command"
    );
}

#[tokio::test]
async fn test_latex_section_hierarchy() {
    let content = fs::read(test_file_path("basic_sections.tex")).expect("Failed to read basic_sections.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX sections");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("Introduction"),
        "FAIL: Should extract \\section{{Introduction}} as text"
    );

    assert!(
        result.content.contains("Methods"),
        "FAIL: Should extract \\section{{Methods}} as text"
    );

    assert!(
        result.content.contains("Results"),
        "FAIL: Should extract \\section{{Results}} as text"
    );

    assert!(
        result.content.contains("Background"),
        "FAIL: Should extract \\subsection{{Background}} as text"
    );

    assert!(
        result.content.contains("Historical Context"),
        "FAIL: Should extract \\subsubsection{{Historical Context}} as text"
    );

    assert!(
        result.content.contains("This is the introduction paragraph"),
        "FAIL: Should extract paragraph text from document body"
    );
}

#[tokio::test]
async fn test_latex_text_formatting() {
    let content = fs::read(test_file_path("formatting.tex")).expect("Failed to read formatting.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX formatting");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("Text Formatting"),
        "FAIL: Should extract \\section{{Text Formatting}}"
    );

    assert!(
        result.content.contains("This is normal text"),
        "FAIL: Should extract plain paragraph text"
    );

    assert!(
        result.content.contains("bold text"),
        "FAIL: Should extract text from \\textbf{{bold text}}"
    );

    assert!(
        result.content.contains("italic text"),
        "FAIL: Should extract text from \\textit{{italic text}}"
    );

    assert!(
        result.content.contains("underlined text"),
        "FAIL: Should extract text from \\underline{{underlined text}}"
    );

    assert!(
        result.content.contains("emphasized text"),
        "FAIL: Should extract text from \\emph{{emphasized text}}"
    );

    assert!(
        result.content.contains("monospace text"),
        "FAIL: Should extract text from \\texttt{{monospace text}}"
    );

    assert!(
        result.content.contains("bold and italic"),
        "FAIL: Should extract text from nested formatting commands"
    );
}

#[tokio::test]
async fn test_latex_math_extraction() {
    let content = fs::read(test_file_path("math.tex")).expect("Failed to read math.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX math");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("Math Formulas"),
        "FAIL: Should extract \\section{{Math Formulas}}"
    );

    assert!(
        result.content.contains("Inline Math"),
        "FAIL: Should extract \\subsection{{Inline Math}}"
    );

    assert!(
        result.content.contains("Display Math"),
        "FAIL: Should extract \\subsection{{Display Math}}"
    );

    assert!(
        result.content.contains("mc") || result.content.contains("mc²"),
        "FAIL: Should extract inline math content from $E = mc^2$"
    );

    assert!(
        result.content.contains("The equation"),
        "FAIL: Should extract text before inline math"
    );

    assert!(
        result.content.contains("is famous"),
        "FAIL: Should extract text after inline math"
    );

    assert!(
        result.content.contains("int") || result.content.contains("∫"),
        "FAIL: Should extract display math environment content"
    );
}

#[tokio::test]
async fn test_latex_table_extraction() {
    let content = fs::read(test_file_path("tables.tex")).expect("Failed to read tables.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX tables");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("Tables"),
        "FAIL: Should extract \\section{{Tables}}"
    );

    assert!(
        result.content.contains("Name"),
        "FAIL: Should extract table header 'Name' from tabular"
    );

    assert!(
        result.content.contains("Age"),
        "FAIL: Should extract table header 'Age' from tabular"
    );

    assert!(
        result.content.contains("Score"),
        "FAIL: Should extract table header 'Score' from tabular"
    );

    assert!(
        result.content.contains("Alice"),
        "FAIL: Should extract table cell 'Alice'"
    );

    assert!(result.content.contains("30"), "FAIL: Should extract table cell '30'");

    assert!(result.content.contains("95"), "FAIL: Should extract table cell '95'");

    assert!(result.content.contains("Bob"), "FAIL: Should extract table cell 'Bob'");

    assert!(
        result.content.contains("Charlie"),
        "FAIL: Should extract table cell 'Charlie'"
    );

    assert!(
        result.content.contains("Column 1"),
        "FAIL: Should extract 'Column 1' from second table"
    );

    assert!(
        result.content.contains("Column 2"),
        "FAIL: Should extract 'Column 2' from second table"
    );

    assert!(
        result.content.contains("Sample table with caption"),
        "FAIL: Should extract table caption from \\caption{{}}"
    );
}

#[tokio::test]
async fn test_latex_list_itemize() {
    let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX lists");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("First item"),
        "FAIL: Should extract \\item First item from itemize"
    );

    assert!(
        result.content.contains("Second item"),
        "FAIL: Should extract \\item Second item from itemize"
    );

    assert!(
        result.content.contains("Third item with nested list"),
        "FAIL: Should extract \\item Third item with nested list"
    );

    assert!(
        result.content.contains("Fourth item"),
        "FAIL: Should extract \\item Fourth item from itemize"
    );
}

#[tokio::test]
async fn test_latex_list_nested() {
    let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX nested lists");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("Nested item 1"),
        "FAIL: Should extract nested \\item Nested item 1"
    );

    assert!(
        result.content.contains("Nested item 2"),
        "FAIL: Should extract nested \\item Nested item 2"
    );
}

#[tokio::test]
async fn test_latex_list_enumerate() {
    let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX enumerate");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("First numbered item"),
        "FAIL: Should extract \\item First numbered item from enumerate"
    );

    assert!(
        result.content.contains("Second numbered item"),
        "FAIL: Should extract \\item Second numbered item from enumerate"
    );

    assert!(
        result.content.contains("Third numbered item"),
        "FAIL: Should extract \\item Third numbered item from enumerate"
    );
}

#[tokio::test]
async fn test_latex_list_description() {
    let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX description lists");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("Term 1"),
        "FAIL: Should extract \\item[Term 1] from description list"
    );

    assert!(
        result.content.contains("Definition of term 1"),
        "FAIL: Should extract definition text from description list"
    );

    assert!(
        result.content.contains("Term 2"),
        "FAIL: Should extract \\item[Term 2] from description list"
    );

    assert!(
        result.content.contains("Definition of term 2"),
        "FAIL: Should extract definition text from description list"
    );
}

#[tokio::test]
async fn test_latex_lists_pandoc_parity() {
    let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");

    let extractor = LatexExtractor;
    let _result = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX lists");
}

#[tokio::test]
async fn test_latex_unicode_handling() {
    let content = fs::read(test_file_path("unicode.tex")).expect("Failed to read unicode.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX with Unicode");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.contains("אֳרָנִים") || result.content.contains("Hebrew"),
        "FAIL: Should extract Hebrew characters or 'Hebrew' text"
    );

    assert!(
        !result.content.is_empty(),
        "FAIL: Should extract non-zero content from unicode.tex"
    );
}

#[tokio::test]
async fn test_latex_no_content_loss_bug() {
    let content = fs::read(test_file_path("minimal.tex")).expect("Failed to read minimal.tex");

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract minimal LaTeX");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        !result.content.is_empty(),
        "FAIL: CRITICAL BUG - Extracted 0 bytes from minimal.tex. Current LaTeX extractor is completely broken."
    );

    assert!(
        result.content.len() >= 10,
        "FAIL: Extracted only {} bytes, expected at least 10. Content: '{}'",
        result.content.len(),
        result.content
    );
}

#[tokio::test]
async fn test_latex_extraction_deterministic() {
    let content = fs::read(test_file_path("minimal.tex")).expect("Failed to read minimal.tex");

    let extractor = LatexExtractor;

    let doc_result1 = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX (first run)");
    let result1 = derive_extraction_result(doc_result1, false, kreuzberg::OutputFormat::Plain);

    let doc_result2 = extractor
        .extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should extract LaTeX (second run)");
    let result2 = derive_extraction_result(doc_result2, false, kreuzberg::OutputFormat::Plain);

    assert_eq!(
        result1.content, result2.content,
        "FAIL: Extraction is not deterministic. Same input produced different outputs."
    );

    assert_eq!(
        result1.metadata.additional, result2.metadata.additional,
        "FAIL: Metadata extraction is not deterministic."
    );
}

#[tokio::test]
async fn test_latex_empty_document_handling() {
    let empty_latex = b"\\documentclass{article}\n\\begin{document}\n\\end{document}";

    let extractor = LatexExtractor;
    let doc = extractor
        .extract_bytes(empty_latex, "text/x-tex", &ExtractionConfig::default())
        .await
        .expect("Should handle empty LaTeX without panicking");
    let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);

    assert!(
        result.content.trim().is_empty(),
        "Empty document should produce empty content (got: '{}')",
        result.content
    );
}
Nomad changes 2026-06-01 23:40:55 +02:00			`//! Comprehensive LaTeX Extractor Tests`
			`//!`
			`//! This test suite defines the expected behavior for LaTeX extraction.`
			`//!`
			`//! Test Coverage:`
			`//! - Basic content extraction (minimal.tex)`
			`//! - Section hierarchy (basic_sections.tex)`
			`//! - Text formatting (formatting.tex)`
			`//! - Mathematical expressions (math.tex)`
			`//! - Tables (tables.tex)`
			`//! - Lists (lists.tex)`
			`//! - Unicode handling (unicode.tex)`
			`//!`
			`//! Success Criteria:`
			`//! - All tests passing (100%)`
			`//! - No content loss (extract meaningful content)`

			`#![cfg(feature = "office")]`

			`use kreuzberg::core::config::ExtractionConfig;`
			`use kreuzberg::extraction::derive::derive_extraction_result;`
			`use kreuzberg::extractors::latex::LatexExtractor;`
			`use kreuzberg::plugins::DocumentExtractor;`
			`use std::fs;`
			`use std::path::PathBuf;`

			`/// Helper to get absolute path to test documents`
			`fn test_file_path(filename: &str) -> PathBuf {`
			`let manifest_dir = env!("CARGO_MANIFEST_DIR");`
			`PathBuf::from(manifest_dir)`
			`.parent()`
			`.expect("Operation failed")`
			`.parent()`
			`.expect("Operation failed")`
			`.join("test_documents")`
			`.join("latex")`
			`.join(filename)`
			`}`

			`#[tokio::test]`
			`async fn test_latex_minimal_extraction() {`
			`let content = fs::read(test_file_path("minimal.tex")).expect("Failed to read minimal.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract minimal LaTeX");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`!result.content.is_empty(),`
			`"FAIL: Extracted 0 bytes (current bug). Should extract content from minimal.tex"`
			`);`

			`assert!(`
			`result.content.contains("Hello World from LaTeX!"),`
			`"FAIL: Should extract 'Hello World from LaTeX!' but got: '{}'",`
			`result.content`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_metadata_extraction() {`
			`let content = fs::read(test_file_path("basic_sections.tex")).expect("Failed to read basic_sections.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX with metadata");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert_eq!(`
			`result.metadata.additional.get("title").and_then(\|v\| v.as_str()),`
			`Some("Test Document"),`
			`"FAIL: Should extract title 'Test Document' from \\title{{}} command"`
			`);`

			`assert_eq!(`
			`result.metadata.additional.get("author").and_then(\|v\| v.as_str()),`
			`Some("John Doe"),`
			`"FAIL: Should extract author 'John Doe' from \\author{{}} command"`
			`);`

			`assert_eq!(`
			`result.metadata.additional.get("date").and_then(\|v\| v.as_str()),`
			`Some("2025-12-07"),`
			`"FAIL: Should extract date '2025-12-07' from \\date{{}} command"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_section_hierarchy() {`
			`let content = fs::read(test_file_path("basic_sections.tex")).expect("Failed to read basic_sections.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX sections");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("Introduction"),`
			`"FAIL: Should extract \\section{{Introduction}} as text"`
			`);`

			`assert!(`
			`result.content.contains("Methods"),`
			`"FAIL: Should extract \\section{{Methods}} as text"`
			`);`

			`assert!(`
			`result.content.contains("Results"),`
			`"FAIL: Should extract \\section{{Results}} as text"`
			`);`

			`assert!(`
			`result.content.contains("Background"),`
			`"FAIL: Should extract \\subsection{{Background}} as text"`
			`);`

			`assert!(`
			`result.content.contains("Historical Context"),`
			`"FAIL: Should extract \\subsubsection{{Historical Context}} as text"`
			`);`

			`assert!(`
			`result.content.contains("This is the introduction paragraph"),`
			`"FAIL: Should extract paragraph text from document body"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_text_formatting() {`
			`let content = fs::read(test_file_path("formatting.tex")).expect("Failed to read formatting.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX formatting");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("Text Formatting"),`
			`"FAIL: Should extract \\section{{Text Formatting}}"`
			`);`

			`assert!(`
			`result.content.contains("This is normal text"),`
			`"FAIL: Should extract plain paragraph text"`
			`);`

			`assert!(`
			`result.content.contains("bold text"),`
			`"FAIL: Should extract text from \\textbf{{bold text}}"`
			`);`

			`assert!(`
			`result.content.contains("italic text"),`
			`"FAIL: Should extract text from \\textit{{italic text}}"`
			`);`

			`assert!(`
			`result.content.contains("underlined text"),`
			`"FAIL: Should extract text from \\underline{{underlined text}}"`
			`);`

			`assert!(`
			`result.content.contains("emphasized text"),`
			`"FAIL: Should extract text from \\emph{{emphasized text}}"`
			`);`

			`assert!(`
			`result.content.contains("monospace text"),`
			`"FAIL: Should extract text from \\texttt{{monospace text}}"`
			`);`

			`assert!(`
			`result.content.contains("bold and italic"),`
			`"FAIL: Should extract text from nested formatting commands"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_math_extraction() {`
			`let content = fs::read(test_file_path("math.tex")).expect("Failed to read math.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX math");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("Math Formulas"),`
			`"FAIL: Should extract \\section{{Math Formulas}}"`
			`);`

			`assert!(`
			`result.content.contains("Inline Math"),`
			`"FAIL: Should extract \\subsection{{Inline Math}}"`
			`);`

			`assert!(`
			`result.content.contains("Display Math"),`
			`"FAIL: Should extract \\subsection{{Display Math}}"`
			`);`

			`assert!(`
			`result.content.contains("mc") \|\| result.content.contains("mc²"),`
			`"FAIL: Should extract inline math content from $E = mc^2$"`
			`);`

			`assert!(`
			`result.content.contains("The equation"),`
			`"FAIL: Should extract text before inline math"`
			`);`

			`assert!(`
			`result.content.contains("is famous"),`
			`"FAIL: Should extract text after inline math"`
			`);`

			`assert!(`
			`result.content.contains("int") \|\| result.content.contains("∫"),`
			`"FAIL: Should extract display math environment content"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_table_extraction() {`
			`let content = fs::read(test_file_path("tables.tex")).expect("Failed to read tables.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX tables");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("Tables"),`
			`"FAIL: Should extract \\section{{Tables}}"`
			`);`

			`assert!(`
			`result.content.contains("Name"),`
			`"FAIL: Should extract table header 'Name' from tabular"`
			`);`

			`assert!(`
			`result.content.contains("Age"),`
			`"FAIL: Should extract table header 'Age' from tabular"`
			`);`

			`assert!(`
			`result.content.contains("Score"),`
			`"FAIL: Should extract table header 'Score' from tabular"`
			`);`

			`assert!(`
			`result.content.contains("Alice"),`
			`"FAIL: Should extract table cell 'Alice'"`
			`);`

			`assert!(result.content.contains("30"), "FAIL: Should extract table cell '30'");`

			`assert!(result.content.contains("95"), "FAIL: Should extract table cell '95'");`

			`assert!(result.content.contains("Bob"), "FAIL: Should extract table cell 'Bob'");`

			`assert!(`
			`result.content.contains("Charlie"),`
			`"FAIL: Should extract table cell 'Charlie'"`
			`);`

			`assert!(`
			`result.content.contains("Column 1"),`
			`"FAIL: Should extract 'Column 1' from second table"`
			`);`

			`assert!(`
			`result.content.contains("Column 2"),`
			`"FAIL: Should extract 'Column 2' from second table"`
			`);`

			`assert!(`
			`result.content.contains("Sample table with caption"),`
			`"FAIL: Should extract table caption from \\caption{{}}"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_list_itemize() {`
			`let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX lists");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("First item"),`
			`"FAIL: Should extract \\item First item from itemize"`
			`);`

			`assert!(`
			`result.content.contains("Second item"),`
			`"FAIL: Should extract \\item Second item from itemize"`
			`);`

			`assert!(`
			`result.content.contains("Third item with nested list"),`
			`"FAIL: Should extract \\item Third item with nested list"`
			`);`

			`assert!(`
			`result.content.contains("Fourth item"),`
			`"FAIL: Should extract \\item Fourth item from itemize"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_list_nested() {`
			`let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX nested lists");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("Nested item 1"),`
			`"FAIL: Should extract nested \\item Nested item 1"`
			`);`

			`assert!(`
			`result.content.contains("Nested item 2"),`
			`"FAIL: Should extract nested \\item Nested item 2"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_list_enumerate() {`
			`let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX enumerate");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("First numbered item"),`
			`"FAIL: Should extract \\item First numbered item from enumerate"`
			`);`

			`assert!(`
			`result.content.contains("Second numbered item"),`
			`"FAIL: Should extract \\item Second numbered item from enumerate"`
			`);`

			`assert!(`
			`result.content.contains("Third numbered item"),`
			`"FAIL: Should extract \\item Third numbered item from enumerate"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_list_description() {`
			`let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX description lists");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("Term 1"),`
			`"FAIL: Should extract \\item[Term 1] from description list"`
			`);`

			`assert!(`
			`result.content.contains("Definition of term 1"),`
			`"FAIL: Should extract definition text from description list"`
			`);`

			`assert!(`
			`result.content.contains("Term 2"),`
			`"FAIL: Should extract \\item[Term 2] from description list"`
			`);`

			`assert!(`
			`result.content.contains("Definition of term 2"),`
			`"FAIL: Should extract definition text from description list"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_lists_pandoc_parity() {`
			`let content = fs::read(test_file_path("lists.tex")).expect("Failed to read lists.tex");`

			`let extractor = LatexExtractor;`
			`let _result = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX lists");`
			`}`

			`#[tokio::test]`
			`async fn test_latex_unicode_handling() {`
			`let content = fs::read(test_file_path("unicode.tex")).expect("Failed to read unicode.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX with Unicode");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.contains("אֳרָנִים") \|\| result.content.contains("Hebrew"),`
			`"FAIL: Should extract Hebrew characters or 'Hebrew' text"`
			`);`

			`assert!(`
			`!result.content.is_empty(),`
			`"FAIL: Should extract non-zero content from unicode.tex"`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_no_content_loss_bug() {`
			`let content = fs::read(test_file_path("minimal.tex")).expect("Failed to read minimal.tex");`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract minimal LaTeX");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`!result.content.is_empty(),`
			`"FAIL: CRITICAL BUG - Extracted 0 bytes from minimal.tex. Current LaTeX extractor is completely broken."`
			`);`

			`assert!(`
			`result.content.len() >= 10,`
			`"FAIL: Extracted only {} bytes, expected at least 10. Content: '{}'",`
			`result.content.len(),`
			`result.content`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_extraction_deterministic() {`
			`let content = fs::read(test_file_path("minimal.tex")).expect("Failed to read minimal.tex");`

			`let extractor = LatexExtractor;`

			`let doc_result1 = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX (first run)");`
			`let result1 = derive_extraction_result(doc_result1, false, kreuzberg::OutputFormat::Plain);`

			`let doc_result2 = extractor`
			`.extract_bytes(&content, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should extract LaTeX (second run)");`
			`let result2 = derive_extraction_result(doc_result2, false, kreuzberg::OutputFormat::Plain);`

			`assert_eq!(`
			`result1.content, result2.content,`
			`"FAIL: Extraction is not deterministic. Same input produced different outputs."`
			`);`

			`assert_eq!(`
			`result1.metadata.additional, result2.metadata.additional,`
			`"FAIL: Metadata extraction is not deterministic."`
			`);`
			`}`

			`#[tokio::test]`
			`async fn test_latex_empty_document_handling() {`
			`let empty_latex = b"\\documentclass{article}\n\\begin{document}\n\\end{document}";`

			`let extractor = LatexExtractor;`
			`let doc = extractor`
			`.extract_bytes(empty_latex, "text/x-tex", &ExtractionConfig::default())`
			`.await`
			`.expect("Should handle empty LaTeX without panicking");`
			`let result = derive_extraction_result(doc, false, kreuzberg::OutputFormat::Plain);`

			`assert!(`
			`result.content.trim().is_empty(),`
			`"Empty document should produce empty content (got: '{}')",`
			`result.content`
			`);`
			`}`