//! Integration tests for the new rendering layer. //! //! These tests construct `InternalDocument` instances via the builder API, //! run them through `derive_extraction_result`, and verify the rendered //! output in each supported `OutputFormat`. //! //! Usage: //! cargo test -p kreuzberg --test rendering_integration_tests mod helpers; use kreuzberg::core::config::OutputFormat; use kreuzberg::extraction::derive::derive_extraction_result; use kreuzberg::types::document_structure::{AnnotationKind, TextAnnotation}; use kreuzberg::types::internal_builder::InternalDocumentBuilder; // ============================================================================ // Helpers // ============================================================================ /// Build a rich document containing a heading, paragraph, list, code block, /// and table — the structural elements every format must handle. fn build_rich_document() -> kreuzberg::types::internal::InternalDocument { let mut b = InternalDocumentBuilder::new("test"); b.push_heading(1, "Main Heading", None, None); b.push_paragraph("This is a paragraph with some text.", vec![], None, None); b.push_list(false); b.push_list_item("First item", false, vec![], None, None); b.push_list_item("Second item", false, vec![], None, None); b.push_list_item("Third item", false, vec![], None, None); b.end_list(); b.push_code("fn main() {\n println!(\"hello\");\n}", Some("rust"), None, None); b.push_table_from_cells( &[ vec!["Name".to_string(), "Value".to_string()], vec!["alpha".to_string(), "1".to_string()], vec!["beta".to_string(), "2".to_string()], ], None, None, ); b.build() } /// Derive an `ExtractionResult` from a document in the given format. fn derive( doc: kreuzberg::types::internal::InternalDocument, format: OutputFormat, ) -> kreuzberg::types::ExtractionResult { derive_extraction_result(doc, false, format) } /// Return the "effective content" — `formatted_content` when present, /// otherwise the plain-text `content`. fn effective_content(result: &kreuzberg::types::ExtractionResult) -> &str { result.formatted_content.as_deref().unwrap_or(&result.content) } // ============================================================================ // 1. Markdown output preserves structure // ============================================================================ #[tokio::test] async fn test_markdown_output_preserves_structure() { let doc = build_rich_document(); let result = derive(doc, OutputFormat::Markdown); let md = effective_content(&result); // Heading assert!( md.contains("# Main Heading"), "Markdown should contain an ATX heading, got:\n{md}" ); // Paragraph assert!( md.contains("This is a paragraph"), "Markdown should contain the paragraph text" ); // List items assert!(md.contains("First item"), "Markdown should contain list items"); // Code block assert!(md.contains("```"), "Markdown should contain a fenced code block"); assert!(md.contains("fn main()"), "Markdown code block should contain the code"); // Table (pipe-delimited) assert!(md.contains('|'), "Markdown should contain pipe-delimited table syntax"); assert!(md.contains("Name"), "Markdown table should contain header cells"); } // ============================================================================ // 2. Djot output format through pipeline // ============================================================================ #[tokio::test] async fn test_djot_output_preserves_structure() { let doc = build_rich_document(); let result = derive(doc, OutputFormat::Djot); let djot = effective_content(&result); // Djot headings use `#` just like markdown assert!( djot.contains("# Main Heading"), "Djot should contain a heading, got:\n{djot}" ); // Paragraph text assert!( djot.contains("This is a paragraph"), "Djot should contain the paragraph text" ); // Code (djot uses ``` fences too) assert!(djot.contains("fn main()"), "Djot should contain the code content"); } // ============================================================================ // 3. HTML output format through pipeline // ============================================================================ #[tokio::test] async fn test_html_output_preserves_structure() { let doc = build_rich_document(); let result = derive(doc, OutputFormat::Html); let html = effective_content(&result); assert!(html.contains(" 50_000, "{format:?}: 1000-paragraph document should produce substantial output, got {} bytes", text.len() ); } }