Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,201 @@
#![cfg(feature = "office")]
use kreuzberg::core::config::{ExtractionConfig, OutputFormat};
use std::path::PathBuf;
/// Helper to get absolute path to test documents
fn test_file_path(filename: &str) -> PathBuf {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
PathBuf::from(manifest_dir)
.parent()
.expect("Operation failed")
.parent()
.expect("Operation failed")
.join("test_documents")
.join("fictionbook")
.join(filename)
}
#[tokio::test]
async fn test_fictionbook_extract_metadata_title() {
let path = test_file_path("meta.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(
result.content.contains("Book title"),
"Book title should be extracted from FB2 content"
);
}
#[tokio::test]
async fn test_fictionbook_extract_metadata_genre() {
let path = test_file_path("meta.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(result.metadata.subject.is_none());
}
#[tokio::test]
async fn test_fictionbook_extract_content_sections() {
let path = test_file_path("titles.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(
result.content.contains("Simple title"),
"Section titles should be extracted"
);
assert!(
result.content.contains("Emphasized"),
"Section with emphasis should be extracted"
);
}
#[tokio::test]
async fn test_fictionbook_extract_section_hierarchy() {
let path = test_file_path("basic.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(
result.content.contains("Top-level title"),
"Top-level section should be extracted"
);
assert!(result.content.contains("Section"), "Nested section should be extracted");
assert!(
result.content.contains("Subsection"),
"Nested subsection should be extracted"
);
}
#[tokio::test]
async fn test_fictionbook_extract_inline_markup() {
let path = test_file_path("emphasis.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
let content = result.content.to_lowercase();
assert!(content.contains("plain"), "Plain text should be extracted");
assert!(content.contains("strong"), "Strong emphasis should be extracted");
assert!(content.contains("emphasis"), "Emphasis should be extracted");
assert!(content.contains("strikethrough"), "Strikethrough should be extracted");
}
#[tokio::test]
async fn test_fictionbook_extract_emphasis() {
let path = test_file_path("basic.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(
result.content.contains("emphasized"),
"Emphasized text should be extracted"
);
}
#[tokio::test]
async fn test_fictionbook_extract_strong() {
let path = test_file_path("basic.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(result.content.contains("strong"), "Strong text should be extracted");
}
#[tokio::test]
async fn test_fictionbook_extract_code() {
let path = test_file_path("basic.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(result.content.contains("verbatim"), "Code content should be extracted");
}
#[tokio::test]
async fn test_fictionbook_extract_blockquote() {
let path = test_file_path("basic.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(result.content.contains("Blockquote"), "Blockquote should be extracted");
}
#[tokio::test]
async fn test_fictionbook_extract_tables() {
let path = test_file_path("tables.fb2");
let result = kreuzberg::extract_file(&path, None, &ExtractionConfig::default())
.await
.expect("Failed to extract FB2 file");
assert!(
!result.content.is_empty(),
"Content should be extracted from file with tables"
);
}
#[tokio::test]
async fn test_fictionbook_markdown_formatting_preservation() {
let path = test_file_path("emphasis.fb2");
let config = ExtractionConfig {
output_format: OutputFormat::Markdown,
..Default::default()
};
let result = kreuzberg::extract_file(&path, None, &config)
.await
.expect("Failed to extract FB2 file");
let md = &result.content;
assert!(
md.contains("**strong**"),
"Strong text should be formatted as **bold** in markdown"
);
assert!(
md.contains("*emphasis*"),
"Emphasis text should be formatted as *italic* in markdown"
);
assert!(
md.contains("~~deleted~~"),
"Strikethrough text should be formatted as ~~strikethrough~~ in markdown"
);
assert!(
md.contains("`code`"),
"Code text should be wrapped in backticks in markdown"
);
}
#[tokio::test]
async fn test_fictionbook_formatting_in_body_paragraphs() {
let path = test_file_path("basic.fb2");
let config = ExtractionConfig {
output_format: OutputFormat::Markdown,
..Default::default()
};
let result = kreuzberg::extract_file(&path, None, &config)
.await
.expect("Failed to extract FB2 file");
let md = &result.content;
assert!(
md.contains("*emphasized*"),
"Emphasis formatting should be preserved in body content"
);
assert!(
md.contains("**strong**"),
"Strong formatting should be preserved in body content"
);
assert!(
md.contains("`verbatim`"),
"Code formatting should be preserved in body content"
);
}