crates/kreuzberg/tests/typst_extractor_tests.rs

//! Comprehensive TDD test suite for Typst document extraction.
//!
//! This test suite validates Typst document extraction against expected outputs.
//! The tests verify:
//! - Document metadata extraction (title, author, date, keywords)
//! - Heading hierarchy parsing (=, ==, ===, etc.)
//! - Inline formatting (bold, italic, code)
//! - Table extraction and parsing
//! - List handling (ordered and unordered)
//! - Link extraction
//! - Mathematical notation preservation
//!
//! Each test document is extracted and validated for correct content extraction.

#![cfg(feature = "office")]

use kreuzberg::core::config::ExtractionConfig;
use kreuzberg::core::extractor::extract_bytes;
use std::{fs, path::PathBuf};

fn typst_fixture(name: &str) -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("../../test_documents/typst")
        .join(name)
}

/// Test simple.typ - Basic Typst document with fundamental formatting
///
/// Document contains:
/// - Document metadata: title, author, date
/// - Level 1 heading: "Introduction"
/// - Level 2 headings: "Subsection", "Features", "Lists", "Code", "Tables", "Links", "Conclusion"
/// - Inline formatting: *bold*, _italic_, `inline code`
/// - Unordered list with 3 items
/// - Code snippet
/// - 2x2 table with headers
/// - Link to Typst website
///
/// Expected: Document should extract text, preserve headings, metadata, and formatting markers
#[tokio::test]
async fn test_simple_typst_document_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("simple.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read simple.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "text/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert_eq!(extraction.mime_type, "text/x-typst", "MIME type should be preserved");

    assert!(!extraction.content.is_empty(), "Extracted content should not be empty");

    assert!(
        extraction.metadata.title.is_some(),
        "Document title should be extracted from #set document()"
    );

    assert!(
        extraction.metadata.authors.is_some(),
        "Document author should be extracted"
    );

    assert!(
        extraction.content.contains("Introduction"),
        "Should extract 'Introduction' heading"
    );
    assert!(
        extraction.content.contains("Features"),
        "Should extract 'Features' heading"
    );
    assert!(
        extraction.content.contains("Conclusion"),
        "Should extract 'Conclusion' heading"
    );

    let intro_count = extraction.content.matches("= Introduction").count();
    let subsection_count = extraction.content.matches("== Subsection").count();
    let features_count = extraction.content.matches("= Features").count();
    let lists_count = extraction.content.matches("== Lists").count();
    let code_count = extraction.content.matches("== Code").count();
    let tables_count = extraction.content.matches("== Tables").count();
    let links_count = extraction.content.matches("== Links").count();
    let conclusion_count = extraction.content.matches("= Conclusion").count();

    assert_eq!(intro_count, 1, "Should extract 'Introduction' (level 1)");
    assert_eq!(subsection_count, 1, "Should extract 'Subsection' (level 2)");
    assert_eq!(features_count, 1, "Should extract 'Features' (level 1)");
    assert_eq!(lists_count, 1, "Should extract 'Lists' (level 2)");
    assert_eq!(code_count, 1, "Should extract 'Code' (level 2)");
    assert_eq!(tables_count, 1, "Should extract 'Tables' (level 2)");
    assert_eq!(links_count, 1, "Should extract 'Links' (level 2)");
    assert_eq!(conclusion_count, 1, "Should extract 'Conclusion' (level 1)");

    assert!(
        extraction.content.contains("*") || extraction.content.contains("bold"),
        "Should preserve bold formatting or text"
    );

    assert!(
        extraction.content.contains("-") || extraction.content.contains("First") || extraction.content.contains("item"),
        "Should extract list content"
    );

    println!(
        "✓ simple.typ: Successfully extracted {} characters with all 8 headings",
        extraction.content.len()
    );
}

/// Test minimal.typ - Minimal Typst document
///
/// Document contains:
/// - Single level 1 heading: "Hello World"
/// - Simple text content
///
/// Expected: Basic heading and content extraction
#[tokio::test]
async fn test_minimal_typst_document_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("minimal.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read minimal.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "application/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(
        !extraction.content.is_empty(),
        "Minimal document should extract content"
    );

    assert!(
        extraction.content.contains("Hello") || extraction.content.contains("World"),
        "Should extract heading content"
    );

    println!(
        "✓ minimal.typ: Successfully extracted {} characters",
        extraction.content.len()
    );
}

/// Test headings.typ - Document focusing on heading hierarchy
///
/// Document contains:
/// - 6 heading levels (=, ==, ===, ====, =====, ======)
/// - Content under each heading level
///
/// Expected: Heading structure should be preserved with level information
#[tokio::test]
async fn test_heading_hierarchy_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("headings.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read headings.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "text/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(!extraction.content.is_empty(), "Document should extract content");

    assert!(
        extraction.content.contains("= Level 1") || extraction.content.contains("Level 1 Heading"),
        "Should extract level 1 heading"
    );

    assert!(
        extraction.content.contains("== Level 2") || extraction.content.contains("Level 2 Heading"),
        "Should extract level 2 heading"
    );

    assert!(
        extraction.content.contains("=== Level 3") || extraction.content.contains("Level 3 Heading"),
        "Should extract level 3 heading"
    );

    assert!(
        extraction.content.contains("==== Level 4") || extraction.content.contains("Level 4 Heading"),
        "Should extract level 4 heading"
    );

    assert!(
        extraction.content.contains("===== Level 5") || extraction.content.contains("Level 5 Heading"),
        "Should extract level 5 heading"
    );

    assert!(
        extraction.content.contains("====== Level 6") || extraction.content.contains("Level 6 Heading"),
        "Should extract level 6 heading"
    );

    let level_1_count = extraction.content.matches("= Level 1").count();
    let level_2_count = extraction.content.matches("== Level 2").count();
    let level_3_count = extraction.content.matches("=== Level 3").count();
    let level_4_count = extraction.content.matches("==== Level 4").count();
    let level_5_count = extraction.content.matches("===== Level 5").count();
    let level_6_count = extraction.content.matches("====== Level 6").count();

    assert_eq!(level_1_count, 1, "Should extract exactly one level 1 heading");
    assert_eq!(level_2_count, 1, "Should extract exactly one level 2 heading");
    assert_eq!(level_3_count, 1, "Should extract exactly one level 3 heading");
    assert_eq!(level_4_count, 1, "Should extract exactly one level 4 heading");
    assert_eq!(level_5_count, 1, "Should extract exactly one level 5 heading");
    assert_eq!(level_6_count, 1, "Should extract exactly one level 6 heading");

    println!(
        "✓ headings.typ: Successfully extracted {} characters with heading structure",
        extraction.content.len()
    );
}

/// Test metadata.typ - Document with comprehensive metadata
///
/// Document contains:
/// - #set document() with: title, author, subject, keywords
/// - Content sections
///
/// Expected: All metadata fields should be extracted correctly
#[tokio::test]
async fn test_metadata_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("metadata.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read metadata.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "application/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    if let Some(title) = extraction.metadata.additional.get("title") {
        assert!(
            title.to_string().contains("Metadata") || title.to_string().contains("Example"),
            "Title should contain expected text"
        );
    }

    if let Some(author) = extraction.metadata.additional.get("author") {
        assert!(
            author.to_string().contains("John") || author.to_string().contains("Doe"),
            "Author should contain expected text"
        );
    }

    if let Some(keywords) = &extraction.metadata.keywords {
        assert!(!keywords.is_empty(), "Keywords should be present");
    }

    assert!(!extraction.content.is_empty(), "Document should extract content");

    println!(
        "✓ metadata.typ: Successfully extracted metadata and {} characters of content",
        extraction.content.len()
    );
}

/// Test advanced.typ - Complex Typst document with multiple features
///
/// Document contains:
/// - Metadata: title, author, keywords, date
/// - Heading numbering configuration
/// - Mathematical notation (inline and display)
/// - Nested heading levels (level 1, 2, 3, 4)
/// - Code blocks (Python example)
/// - Complex tables with 3 columns and 4 rows
/// - Multiple paragraph sections
/// - Links with text
/// - Multiple formatting combinations
///
/// Expected: Comprehensive extraction of all document elements
#[tokio::test]
async fn test_advanced_typst_document_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("advanced.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read advanced.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "text/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(extraction.metadata.title.is_some(), "Title should be extracted");

    assert!(
        !extraction.content.is_empty(),
        "Advanced document should extract content"
    );

    assert!(
        extraction.content.contains("$")
            || extraction.content.contains("equation")
            || extraction.content.contains("math"),
        "Should extract or preserve mathematical notation"
    );

    assert!(
        extraction.content.contains("Mathematical")
            || extraction.content.contains("Formatting")
            || extraction.content.contains("Features"),
        "Should extract section headings"
    );

    assert!(
        extraction.content.contains("python")
            || extraction.content.contains("def")
            || extraction.content.contains("fibonacci")
            || extraction.content.contains("```"),
        "Should extract code block content"
    );

    let level_count = extraction.content.matches("=").count();
    assert!(level_count >= 3, "Should preserve nested heading hierarchy");

    assert!(
        extraction.content.contains("Name")
            || extraction.content.contains("Alice")
            || extraction.content.contains("Table"),
        "Should extract table content"
    );

    assert!(
        extraction.content.contains("example")
            || extraction.content.contains("link")
            || extraction.content.contains("http"),
        "Should extract link content"
    );

    println!(
        "✓ advanced.typ: Successfully extracted {} characters with complex formatting",
        extraction.content.len()
    );
}

/// Test typst-reader.typ - Pandoc test file
///
/// Document from Pandoc test suite demonstrating Typst reader functionality
///
/// Expected: Proper extraction of Typst-specific syntax
#[tokio::test]
async fn test_typst_reader_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("typst-reader.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read typst-reader.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "application/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(
        !extraction.content.is_empty(),
        "Should extract content from Pandoc test file"
    );

    assert!(
        extraction.content.contains("=") || extraction.content.contains("Fibonacci"),
        "Should extract heading or content from test file"
    );

    println!(
        "✓ typst-reader.typ: Successfully extracted {} characters",
        extraction.content.len()
    );
}

/// Test undergradmath.typ - Pandoc test file with complex math
///
/// Document from Pandoc test suite with extensive mathematical notation
/// and complex formatting
///
/// Expected: Handling of complex Typst syntax with metadata and content
#[tokio::test]
async fn test_undergradmath_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("undergradmath.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read undergradmath.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "text/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(
        !extraction.content.is_empty(),
        "Should extract content from complex math document"
    );

    if let Some(title) = extraction.metadata.additional.get("title") {
        assert!(!title.to_string().is_empty(), "Title should be extracted");
    }

    assert!(
        extraction.content.contains("=") || extraction.content.contains("Typst") || extraction.content.len() > 100,
        "Should extract document structure or content"
    );

    println!(
        "✓ undergradmath.typ: Successfully extracted {} characters from math document",
        extraction.content.len()
    );
}

/// Test MIME type detection and fallback
///
/// Verifies that Typst documents can be extracted with different MIME type specifications
#[tokio::test]
async fn test_typst_mime_type_variants() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("simple.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read simple.typ: {}. Skipping test.", e);
            return;
        }
    };

    let mime_types = vec!["application/x-typst", "text/x-typst", "text/plain"];

    for mime_type in mime_types {
        let result = extract_bytes(&content, mime_type, &config).await;

        if let Ok(extraction) = result {
            assert!(
                !extraction.content.is_empty(),
                "Should extract content with MIME type: {}",
                mime_type
            );
            println!(
                "✓ MIME type '{}': Successfully extracted {} characters",
                mime_type,
                extraction.content.len()
            );
        }
    }
}

/// Test formatting preservation
///
/// Validates that inline formatting markers are preserved in extracted content
#[tokio::test]
async fn test_formatting_preservation() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("simple.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read simple.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "text/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(
        extraction.content.contains("*") || extraction.content.contains("bold"),
        "Should preserve bold formatting or text"
    );

    assert!(
        extraction.content.contains("_") || extraction.content.contains("italic"),
        "Should preserve italic formatting or text"
    );

    assert!(
        extraction.content.contains("`") || extraction.content.contains("code"),
        "Should preserve code formatting or text"
    );

    println!("✓ Formatting preservation: All markers/content found in extracted text");
}

/// Test large document handling
///
/// Validates extraction of the large undergradmath document
#[tokio::test]
async fn test_large_document_extraction() {
    let config = ExtractionConfig::default();

    let doc_path = typst_fixture("undergradmath.typ");
    let content = match fs::read(doc_path) {
        Ok(c) => c,
        Err(e) => {
            eprintln!("Warning: Could not read undergradmath.typ: {}. Skipping test.", e);
            return;
        }
    };

    let result = extract_bytes(&content, "text/x-typst", &config).await;
    if result.is_err() {
        println!("Skipping test: Typst extractor may not be available");
        return;
    }

    let extraction = result.expect("Operation failed");

    assert!(
        !extraction.content.is_empty(),
        "Should extract content from large document"
    );

    println!(
        "✓ Large document: Extracted {} bytes of content from source file",
        extraction.content.len()
    );
}

/// Test empty/whitespace handling
///
/// Validates graceful handling of edge cases
#[tokio::test]
async fn test_empty_content_handling() {
    let config = ExtractionConfig::default();

    let empty_content = b"";
    let result = extract_bytes(empty_content, "text/x-typst", &config).await;

    match result {
        Ok(extraction) => {
            println!(
                "✓ Empty content: Handled gracefully, extracted {} bytes",
                extraction.content.len()
            );
        }
        Err(e) => {
            println!("✓ Empty content: Resulted in expected error: {}", e);
        }
    }
}

/// Test MIME type priority
///
/// Validates that Typst extractor has correct priority (50)
#[tokio::test]
async fn test_typst_extractor_priority() {
    use kreuzberg::extractors::TypstExtractor;
    use kreuzberg::plugins::DocumentExtractor;

    let extractor = TypstExtractor;
    let priority = extractor.priority();

    assert_eq!(priority, 50, "Typst extractor should have priority 50");
    println!("✓ Typst extractor priority: {}", priority);
}

/// Test supported MIME types
///
/// Validates that extractor claims to support Typst MIME types
#[tokio::test]
async fn test_supported_mime_types() {
    use kreuzberg::extractors::TypstExtractor;
    use kreuzberg::plugins::DocumentExtractor;

    let extractor = TypstExtractor;
    let mime_types = extractor.supported_mime_types();

    assert!(
        mime_types.contains(&"application/x-typst"),
        "Should support application/x-typst"
    );
    assert!(mime_types.contains(&"text/x-typst"), "Should support text/x-typst");

    println!("✓ Supported MIME types: {:?}", mime_types);
}
Nomad changes 2026-06-01 23:40:55 +02:00			`//! Comprehensive TDD test suite for Typst document extraction.`
			`//!`
			`//! This test suite validates Typst document extraction against expected outputs.`
			`//! The tests verify:`
			`//! - Document metadata extraction (title, author, date, keywords)`
			`//! - Heading hierarchy parsing (=, ==, ===, etc.)`
			`//! - Inline formatting (bold, italic, code)`
			`//! - Table extraction and parsing`
			`//! - List handling (ordered and unordered)`
			`//! - Link extraction`
			`//! - Mathematical notation preservation`
			`//!`
			`//! Each test document is extracted and validated for correct content extraction.`

			`#![cfg(feature = "office")]`

			`use kreuzberg::core::config::ExtractionConfig;`
			`use kreuzberg::core::extractor::extract_bytes;`
			`use std::{fs, path::PathBuf};`

			`fn typst_fixture(name: &str) -> PathBuf {`
			`PathBuf::from(env!("CARGO_MANIFEST_DIR"))`
			`.join("../../test_documents/typst")`
			`.join(name)`
			`}`

			`/// Test simple.typ - Basic Typst document with fundamental formatting`
			`///`
			`/// Document contains:`
			`/// - Document metadata: title, author, date`
			`/// - Level 1 heading: "Introduction"`
			`/// - Level 2 headings: "Subsection", "Features", "Lists", "Code", "Tables", "Links", "Conclusion"`
			/// - Inline formatting: bold, _italic_, `inline code`
			`/// - Unordered list with 3 items`
			`/// - Code snippet`
			`/// - 2x2 table with headers`
			`/// - Link to Typst website`
			`///`
			`/// Expected: Document should extract text, preserve headings, metadata, and formatting markers`
			`#[tokio::test]`
			`async fn test_simple_typst_document_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("simple.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read simple.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "text/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert_eq!(extraction.mime_type, "text/x-typst", "MIME type should be preserved");`

			`assert!(!extraction.content.is_empty(), "Extracted content should not be empty");`

			`assert!(`
			`extraction.metadata.title.is_some(),`
			`"Document title should be extracted from #set document()"`
			`);`

			`assert!(`
			`extraction.metadata.authors.is_some(),`
			`"Document author should be extracted"`
			`);`

			`assert!(`
			`extraction.content.contains("Introduction"),`
			`"Should extract 'Introduction' heading"`
			`);`
			`assert!(`
			`extraction.content.contains("Features"),`
			`"Should extract 'Features' heading"`
			`);`
			`assert!(`
			`extraction.content.contains("Conclusion"),`
			`"Should extract 'Conclusion' heading"`
			`);`

			`let intro_count = extraction.content.matches("= Introduction").count();`
			`let subsection_count = extraction.content.matches("== Subsection").count();`
			`let features_count = extraction.content.matches("= Features").count();`
			`let lists_count = extraction.content.matches("== Lists").count();`
			`let code_count = extraction.content.matches("== Code").count();`
			`let tables_count = extraction.content.matches("== Tables").count();`
			`let links_count = extraction.content.matches("== Links").count();`
			`let conclusion_count = extraction.content.matches("= Conclusion").count();`

			`assert_eq!(intro_count, 1, "Should extract 'Introduction' (level 1)");`
			`assert_eq!(subsection_count, 1, "Should extract 'Subsection' (level 2)");`
			`assert_eq!(features_count, 1, "Should extract 'Features' (level 1)");`
			`assert_eq!(lists_count, 1, "Should extract 'Lists' (level 2)");`
			`assert_eq!(code_count, 1, "Should extract 'Code' (level 2)");`
			`assert_eq!(tables_count, 1, "Should extract 'Tables' (level 2)");`
			`assert_eq!(links_count, 1, "Should extract 'Links' (level 2)");`
			`assert_eq!(conclusion_count, 1, "Should extract 'Conclusion' (level 1)");`

			`assert!(`
			`extraction.content.contains("*") \|\| extraction.content.contains("bold"),`
			`"Should preserve bold formatting or text"`
			`);`

			`assert!(`
			`extraction.content.contains("-") \|\| extraction.content.contains("First") \|\| extraction.content.contains("item"),`
			`"Should extract list content"`
			`);`

			`println!(`
			`"✓ simple.typ: Successfully extracted {} characters with all 8 headings",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test minimal.typ - Minimal Typst document`
			`///`
			`/// Document contains:`
			`/// - Single level 1 heading: "Hello World"`
			`/// - Simple text content`
			`///`
			`/// Expected: Basic heading and content extraction`
			`#[tokio::test]`
			`async fn test_minimal_typst_document_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("minimal.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read minimal.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "application/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(`
			`!extraction.content.is_empty(),`
			`"Minimal document should extract content"`
			`);`

			`assert!(`
			`extraction.content.contains("Hello") \|\| extraction.content.contains("World"),`
			`"Should extract heading content"`
			`);`

			`println!(`
			`"✓ minimal.typ: Successfully extracted {} characters",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test headings.typ - Document focusing on heading hierarchy`
			`///`
			`/// Document contains:`
			`/// - 6 heading levels (=, ==, ===, ====, =====, ======)`
			`/// - Content under each heading level`
			`///`
			`/// Expected: Heading structure should be preserved with level information`
			`#[tokio::test]`
			`async fn test_heading_hierarchy_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("headings.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read headings.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "text/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(!extraction.content.is_empty(), "Document should extract content");`

			`assert!(`
			`extraction.content.contains("= Level 1") \|\| extraction.content.contains("Level 1 Heading"),`
			`"Should extract level 1 heading"`
			`);`

			`assert!(`
			`extraction.content.contains("== Level 2") \|\| extraction.content.contains("Level 2 Heading"),`
			`"Should extract level 2 heading"`
			`);`

			`assert!(`
			`extraction.content.contains("=== Level 3") \|\| extraction.content.contains("Level 3 Heading"),`
			`"Should extract level 3 heading"`
			`);`

			`assert!(`
			`extraction.content.contains("==== Level 4") \|\| extraction.content.contains("Level 4 Heading"),`
			`"Should extract level 4 heading"`
			`);`

			`assert!(`
			`extraction.content.contains("===== Level 5") \|\| extraction.content.contains("Level 5 Heading"),`
			`"Should extract level 5 heading"`
			`);`

			`assert!(`
			`extraction.content.contains("====== Level 6") \|\| extraction.content.contains("Level 6 Heading"),`
			`"Should extract level 6 heading"`
			`);`

			`let level_1_count = extraction.content.matches("= Level 1").count();`
			`let level_2_count = extraction.content.matches("== Level 2").count();`
			`let level_3_count = extraction.content.matches("=== Level 3").count();`
			`let level_4_count = extraction.content.matches("==== Level 4").count();`
			`let level_5_count = extraction.content.matches("===== Level 5").count();`
			`let level_6_count = extraction.content.matches("====== Level 6").count();`

			`assert_eq!(level_1_count, 1, "Should extract exactly one level 1 heading");`
			`assert_eq!(level_2_count, 1, "Should extract exactly one level 2 heading");`
			`assert_eq!(level_3_count, 1, "Should extract exactly one level 3 heading");`
			`assert_eq!(level_4_count, 1, "Should extract exactly one level 4 heading");`
			`assert_eq!(level_5_count, 1, "Should extract exactly one level 5 heading");`
			`assert_eq!(level_6_count, 1, "Should extract exactly one level 6 heading");`

			`println!(`
			`"✓ headings.typ: Successfully extracted {} characters with heading structure",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test metadata.typ - Document with comprehensive metadata`
			`///`
			`/// Document contains:`
			`/// - #set document() with: title, author, subject, keywords`
			`/// - Content sections`
			`///`
			`/// Expected: All metadata fields should be extracted correctly`
			`#[tokio::test]`
			`async fn test_metadata_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("metadata.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read metadata.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "application/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`if let Some(title) = extraction.metadata.additional.get("title") {`
			`assert!(`
			`title.to_string().contains("Metadata") \|\| title.to_string().contains("Example"),`
			`"Title should contain expected text"`
			`);`
			`}`

			`if let Some(author) = extraction.metadata.additional.get("author") {`
			`assert!(`
			`author.to_string().contains("John") \|\| author.to_string().contains("Doe"),`
			`"Author should contain expected text"`
			`);`
			`}`

			`if let Some(keywords) = &extraction.metadata.keywords {`
			`assert!(!keywords.is_empty(), "Keywords should be present");`
			`}`

			`assert!(!extraction.content.is_empty(), "Document should extract content");`

			`println!(`
			`"✓ metadata.typ: Successfully extracted metadata and {} characters of content",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test advanced.typ - Complex Typst document with multiple features`
			`///`
			`/// Document contains:`
			`/// - Metadata: title, author, keywords, date`
			`/// - Heading numbering configuration`
			`/// - Mathematical notation (inline and display)`
			`/// - Nested heading levels (level 1, 2, 3, 4)`
			`/// - Code blocks (Python example)`
			`/// - Complex tables with 3 columns and 4 rows`
			`/// - Multiple paragraph sections`
			`/// - Links with text`
			`/// - Multiple formatting combinations`
			`///`
			`/// Expected: Comprehensive extraction of all document elements`
			`#[tokio::test]`
			`async fn test_advanced_typst_document_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("advanced.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read advanced.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "text/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(extraction.metadata.title.is_some(), "Title should be extracted");`

			`assert!(`
			`!extraction.content.is_empty(),`
			`"Advanced document should extract content"`
			`);`

			`assert!(`
			`extraction.content.contains("$")`
			`\|\| extraction.content.contains("equation")`
			`\|\| extraction.content.contains("math"),`
			`"Should extract or preserve mathematical notation"`
			`);`

			`assert!(`
			`extraction.content.contains("Mathematical")`
			`\|\| extraction.content.contains("Formatting")`
			`\|\| extraction.content.contains("Features"),`
			`"Should extract section headings"`
			`);`

			`assert!(`
			`extraction.content.contains("python")`
			`\|\| extraction.content.contains("def")`
			`\|\| extraction.content.contains("fibonacci")`
			\|\| extraction.content.contains("```"),
			`"Should extract code block content"`
			`);`

			`let level_count = extraction.content.matches("=").count();`
			`assert!(level_count >= 3, "Should preserve nested heading hierarchy");`

			`assert!(`
			`extraction.content.contains("Name")`
			`\|\| extraction.content.contains("Alice")`
			`\|\| extraction.content.contains("Table"),`
			`"Should extract table content"`
			`);`

			`assert!(`
			`extraction.content.contains("example")`
			`\|\| extraction.content.contains("link")`
			`\|\| extraction.content.contains("http"),`
			`"Should extract link content"`
			`);`

			`println!(`
			`"✓ advanced.typ: Successfully extracted {} characters with complex formatting",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test typst-reader.typ - Pandoc test file`
			`///`
			`/// Document from Pandoc test suite demonstrating Typst reader functionality`
			`///`
			`/// Expected: Proper extraction of Typst-specific syntax`
			`#[tokio::test]`
			`async fn test_typst_reader_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("typst-reader.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read typst-reader.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "application/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(`
			`!extraction.content.is_empty(),`
			`"Should extract content from Pandoc test file"`
			`);`

			`assert!(`
			`extraction.content.contains("=") \|\| extraction.content.contains("Fibonacci"),`
			`"Should extract heading or content from test file"`
			`);`

			`println!(`
			`"✓ typst-reader.typ: Successfully extracted {} characters",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test undergradmath.typ - Pandoc test file with complex math`
			`///`
			`/// Document from Pandoc test suite with extensive mathematical notation`
			`/// and complex formatting`
			`///`
			`/// Expected: Handling of complex Typst syntax with metadata and content`
			`#[tokio::test]`
			`async fn test_undergradmath_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("undergradmath.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read undergradmath.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "text/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(`
			`!extraction.content.is_empty(),`
			`"Should extract content from complex math document"`
			`);`

			`if let Some(title) = extraction.metadata.additional.get("title") {`
			`assert!(!title.to_string().is_empty(), "Title should be extracted");`
			`}`

			`assert!(`
			`extraction.content.contains("=") \|\| extraction.content.contains("Typst") \|\| extraction.content.len() > 100,`
			`"Should extract document structure or content"`
			`);`

			`println!(`
			`"✓ undergradmath.typ: Successfully extracted {} characters from math document",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test MIME type detection and fallback`
			`///`
			`/// Verifies that Typst documents can be extracted with different MIME type specifications`
			`#[tokio::test]`
			`async fn test_typst_mime_type_variants() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("simple.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read simple.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let mime_types = vec!["application/x-typst", "text/x-typst", "text/plain"];`

			`for mime_type in mime_types {`
			`let result = extract_bytes(&content, mime_type, &config).await;`

			`if let Ok(extraction) = result {`
			`assert!(`
			`!extraction.content.is_empty(),`
			`"Should extract content with MIME type: {}",`
			`mime_type`
			`);`
			`println!(`
			`"✓ MIME type '{}': Successfully extracted {} characters",`
			`mime_type,`
			`extraction.content.len()`
			`);`
			`}`
			`}`
			`}`

			`/// Test formatting preservation`
			`///`
			`/// Validates that inline formatting markers are preserved in extracted content`
			`#[tokio::test]`
			`async fn test_formatting_preservation() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("simple.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read simple.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "text/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(`
			`extraction.content.contains("*") \|\| extraction.content.contains("bold"),`
			`"Should preserve bold formatting or text"`
			`);`

			`assert!(`
			`extraction.content.contains("_") \|\| extraction.content.contains("italic"),`
			`"Should preserve italic formatting or text"`
			`);`

			`assert!(`
			extraction.content.contains("`") \|\| extraction.content.contains("code"),
			`"Should preserve code formatting or text"`
			`);`

			`println!("✓ Formatting preservation: All markers/content found in extracted text");`
			`}`

			`/// Test large document handling`
			`///`
			`/// Validates extraction of the large undergradmath document`
			`#[tokio::test]`
			`async fn test_large_document_extraction() {`
			`let config = ExtractionConfig::default();`

			`let doc_path = typst_fixture("undergradmath.typ");`
			`let content = match fs::read(doc_path) {`
			`Ok(c) => c,`
			`Err(e) => {`
			`eprintln!("Warning: Could not read undergradmath.typ: {}. Skipping test.", e);`
			`return;`
			`}`
			`};`

			`let result = extract_bytes(&content, "text/x-typst", &config).await;`
			`if result.is_err() {`
			`println!("Skipping test: Typst extractor may not be available");`
			`return;`
			`}`

			`let extraction = result.expect("Operation failed");`

			`assert!(`
			`!extraction.content.is_empty(),`
			`"Should extract content from large document"`
			`);`

			`println!(`
			`"✓ Large document: Extracted {} bytes of content from source file",`
			`extraction.content.len()`
			`);`
			`}`

			`/// Test empty/whitespace handling`
			`///`
			`/// Validates graceful handling of edge cases`
			`#[tokio::test]`
			`async fn test_empty_content_handling() {`
			`let config = ExtractionConfig::default();`

			`let empty_content = b"";`
			`let result = extract_bytes(empty_content, "text/x-typst", &config).await;`

			`match result {`
			`Ok(extraction) => {`
			`println!(`
			`"✓ Empty content: Handled gracefully, extracted {} bytes",`
			`extraction.content.len()`
			`);`
			`}`
			`Err(e) => {`
			`println!("✓ Empty content: Resulted in expected error: {}", e);`
			`}`
			`}`
			`}`

			`/// Test MIME type priority`
			`///`
			`/// Validates that Typst extractor has correct priority (50)`
			`#[tokio::test]`
			`async fn test_typst_extractor_priority() {`
			`use kreuzberg::extractors::TypstExtractor;`
			`use kreuzberg::plugins::DocumentExtractor;`

			`let extractor = TypstExtractor;`
			`let priority = extractor.priority();`

			`assert_eq!(priority, 50, "Typst extractor should have priority 50");`
			`println!("✓ Typst extractor priority: {}", priority);`
			`}`

			`/// Test supported MIME types`
			`///`
			`/// Validates that extractor claims to support Typst MIME types`
			`#[tokio::test]`
			`async fn test_supported_mime_types() {`
			`use kreuzberg::extractors::TypstExtractor;`
			`use kreuzberg::plugins::DocumentExtractor;`

			`let extractor = TypstExtractor;`
			`let mime_types = extractor.supported_mime_types();`

			`assert!(`
			`mime_types.contains(&"application/x-typst"),`
			`"Should support application/x-typst"`
			`);`
			`assert!(mime_types.contains(&"text/x-typst"), "Should support text/x-typst");`

			`println!("✓ Supported MIME types: {:?}", mime_types);`
			`}`