// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef // swift-format-ignore-file import XCTest import Foundation #if canImport(FoundationNetworking) import FoundationNetworking #endif import Kreuzberg import RustBridge /// E2e tests for category: smoke. final class SmokeTests: XCTestCase { override class func setUp() { super.setUp() let _testDocs = URL(fileURLWithPath: #filePath) .deletingLastPathComponent() // Tests/ .deletingLastPathComponent() // Tests/ .deletingLastPathComponent() // swift/ .deletingLastPathComponent() // packages/ .deletingLastPathComponent() // .appendingPathComponent("test_documents") if FileManager.default.fileExists(atPath: _testDocs.path) { FileManager.default.changeCurrentDirectoryPath(_testDocs.path) } } func testOcrImagePng() throws { // OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge. let result = try Kreuzberg.extractBytes("images/test_hello_world.png", "image/png", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png") XCTAssertGreaterThanOrEqual(result.content.count, 1) XCTAssertTrue(result.content.contains("Hello") || result.content.contains("World") || result.content.contains("hello") || result.content.contains("world"), "expected to contain at least one of the specified values") } func testSmokeDocxBasic() async throws { // Smoke test: DOCX with formatted text let result = try await Kreuzberg.extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document") XCTAssertGreaterThanOrEqual(result.content.count, 20) XCTAssertTrue(result.content.contains("Lorem") || result.content.contains("ipsum") || result.content.contains("document") || result.content.contains("text"), "expected to contain at least one of the specified values") } func testSmokeHtmlBasic() async throws { // Smoke test: HTML table extraction let result = try await Kreuzberg.extractFile("html/simple_table.html", "text/html", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/html") XCTAssertGreaterThanOrEqual(result.content.count, 10) XCTAssertTrue(result.content.contains("Sample Data Table") || result.content.contains("Laptop") || result.content.contains("Electronics") || result.content.contains("Product"), "expected to contain at least one of the specified values") } func testSmokeImagePng() async throws { // Smoke test: PNG image (without OCR, metadata only) let result = try await Kreuzberg.extractFile("images/sample.png", nil, "{\"disable_ocr\":true}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png") } func testSmokeJsonBasic() async throws { // Smoke test: JSON file extraction let result = try await Kreuzberg.extractFile("json/simple.json", "application/json", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/json") XCTAssertGreaterThanOrEqual(result.content.count, 5) } func testSmokePdfBasic() async throws { // Smoke test: PDF with simple text extraction let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", "application/pdf", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 50) XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("To Whom it May Concern"), "expected to contain at least one of the specified values") } func testSmokeTxtBasic() async throws { // Smoke test: Plain text file let result = try await Kreuzberg.extractFile("text/report.txt", "text/plain", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/plain") XCTAssertGreaterThanOrEqual(result.content.count, 5) } func testSmokeXlsxBasic() async throws { // Smoke test: XLSX with basic spreadsheet data including tables let result = try await Kreuzberg.extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") XCTAssertGreaterThanOrEqual(result.content.count, 100) XCTAssertTrue(result.content.contains("Team"), "expected to contain: \("Team")") XCTAssertTrue(result.content.contains("Location"), "expected to contain: \("Location")") XCTAssertTrue(result.content.contains("Stanley Cups"), "expected to contain: \("Stanley Cups")") XCTAssertTrue(result.content.contains("Blues"), "expected to contain: \("Blues")") XCTAssertTrue(result.content.contains("Flyers"), "expected to contain: \("Flyers")") XCTAssertTrue(result.content.contains("Maple Leafs"), "expected to contain: \("Maple Leafs")") XCTAssertTrue(result.content.contains("STL"), "expected to contain: \("STL")") XCTAssertTrue(result.content.contains("PHI"), "expected to contain: \("PHI")") XCTAssertTrue(result.content.contains("TOR"), "expected to contain: \("TOR")") // skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type } }