fil/crates/kreuzberg/tests/fictionbook_extractor_tests.rs

#![cfg(feature = "office")]

use kreuzberg::core::config::{ExtractionConfig, OutputFormat};
use std::path::PathBuf;

/// Helper to get absolute path to test documents
fn test_file_path(filename: &str) -> PathBuf {
    let manifest_dir = env!("CARGO_MANIFEST_DIR");
    PathBuf::from(manifest_dir)
        .parent()
        .expect("Operation failed")
        .parent()
        .expect("Operation failed")
        .join("test_documents")
        .join("fictionbook")
        .join(filename)
}

#[tokio::test]
async fn test_fictionbook_extract_metadata_title() {
    let path = test_file_path("meta.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(
        result.content.contains("Book title"),
        "Book title should be extracted from FB2 content"
    );
}

#[tokio::test]
async fn test_fictionbook_extract_metadata_genre() {
    let path = test_file_path("meta.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(result.metadata.subject.is_none());
}

#[tokio::test]
async fn test_fictionbook_extract_content_sections() {
    let path = test_file_path("titles.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(
        result.content.contains("Simple title"),
        "Section titles should be extracted"
    );
    assert!(
        result.content.contains("Emphasized"),
        "Section with emphasis should be extracted"
    );
}

#[tokio::test]
async fn test_fictionbook_extract_section_hierarchy() {
    let path = test_file_path("basic.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(
        result.content.contains("Top-level title"),
        "Top-level section should be extracted"
    );
    assert!(result.content.contains("Section"), "Nested section should be extracted");
    assert!(
        result.content.contains("Subsection"),
        "Nested subsection should be extracted"
    );
}

#[tokio::test]
async fn test_fictionbook_extract_inline_markup() {
    let path = test_file_path("emphasis.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    let content = result.content.to_lowercase();
    assert!(content.contains("plain"), "Plain text should be extracted");
    assert!(content.contains("strong"), "Strong emphasis should be extracted");
    assert!(content.contains("emphasis"), "Emphasis should be extracted");
    assert!(content.contains("strikethrough"), "Strikethrough should be extracted");
}

#[tokio::test]
async fn test_fictionbook_extract_emphasis() {
    let path = test_file_path("basic.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(
        result.content.contains("emphasized"),
        "Emphasized text should be extracted"
    );
}

#[tokio::test]
async fn test_fictionbook_extract_strong() {
    let path = test_file_path("basic.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(result.content.contains("strong"), "Strong text should be extracted");
}

#[tokio::test]
async fn test_fictionbook_extract_code() {
    let path = test_file_path("basic.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(result.content.contains("verbatim"), "Code content should be extracted");
}

#[tokio::test]
async fn test_fictionbook_extract_blockquote() {
    let path = test_file_path("basic.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(result.content.contains("Blockquote"), "Blockquote should be extracted");
}

#[tokio::test]
async fn test_fictionbook_extract_tables() {
    let path = test_file_path("tables.fb2");
    let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
        .await
        .expect("Failed to extract FB2 file");

    assert!(
        !result.content.is_empty(),
        "Content should be extracted from file with tables"
    );
}

#[tokio::test]
async fn test_fictionbook_markdown_formatting_preservation() {
    let path = test_file_path("emphasis.fb2");
    let config = ExtractionConfig {
        output_format: OutputFormat::Markdown,
        ..Default::default()
    };
    let result = kreuzberg::extract_file(&path, None, &config)
        .await
        .expect("Failed to extract FB2 file");

    let md = &result.content;
    assert!(
        md.contains("**strong**"),
        "Strong text should be formatted as **bold** in markdown"
    );
    assert!(
        md.contains("*emphasis*"),
        "Emphasis text should be formatted as *italic* in markdown"
    );
    assert!(
        md.contains("~~deleted~~"),
        "Strikethrough text should be formatted as ~~strikethrough~~ in markdown"
    );
    assert!(
        md.contains("`code`"),
        "Code text should be wrapped in backticks in markdown"
    );
}

#[tokio::test]
async fn test_fictionbook_formatting_in_body_paragraphs() {
    let path = test_file_path("basic.fb2");
    let config = ExtractionConfig {
        output_format: OutputFormat::Markdown,
        ..Default::default()
    };
    let result = kreuzberg::extract_file(&path, None, &config)
        .await
        .expect("Failed to extract FB2 file");

    let md = &result.content;
    assert!(
        md.contains("*emphasized*"),
        "Emphasis formatting should be preserved in body content"
    );
    assert!(
        md.contains("**strong**"),
        "Strong formatting should be preserved in body content"
    );
    assert!(
        md.contains("`verbatim`"),
        "Code formatting should be preserved in body content"
    );
}