e2e/swift_e2e/Tests/KreuzbergE2ETests/SmokeTests.swift

// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file

import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge

/// E2e tests for category: smoke.
final class SmokeTests: XCTestCase {
    override class func setUp() {
        super.setUp()
        let _testDocs = URL(fileURLWithPath: #filePath)
            .deletingLastPathComponent() // <Module>Tests/
            .deletingLastPathComponent() // Tests/
            .deletingLastPathComponent() // swift/
            .deletingLastPathComponent() // packages/
            .deletingLastPathComponent() // <repo root>
            .appendingPathComponent("test_documents")
        if FileManager.default.fileExists(atPath: _testDocs.path) {
            FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
        }
    }

    func testOcrImagePng() throws {
        // OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
        let result = try Kreuzberg.extractBytes("images/test_hello_world.png", "image/png", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")
        XCTAssertGreaterThanOrEqual(result.content.count, 1)
        XCTAssertTrue(result.content.contains("Hello") || result.content.contains("World") || result.content.contains("hello") || result.content.contains("world"), "expected to contain at least one of the specified values")
    }

    func testSmokeDocxBasic() async throws {
        // Smoke test: DOCX with formatted text
        let result = try await Kreuzberg.extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
        XCTAssertGreaterThanOrEqual(result.content.count, 20)
        XCTAssertTrue(result.content.contains("Lorem") || result.content.contains("ipsum") || result.content.contains("document") || result.content.contains("text"), "expected to contain at least one of the specified values")
    }

    func testSmokeHtmlBasic() async throws {
        // Smoke test: HTML table extraction
        let result = try await Kreuzberg.extractFile("html/simple_table.html", "text/html", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/html")
        XCTAssertGreaterThanOrEqual(result.content.count, 10)
        XCTAssertTrue(result.content.contains("Sample Data Table") || result.content.contains("Laptop") || result.content.contains("Electronics") || result.content.contains("Product"), "expected to contain at least one of the specified values")
    }

    func testSmokeImagePng() async throws {
        // Smoke test: PNG image (without OCR, metadata only)
        let result = try await Kreuzberg.extractFile("images/sample.png", nil, "{\"disable_ocr\":true}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")
    }

    func testSmokeJsonBasic() async throws {
        // Smoke test: JSON file extraction
        let result = try await Kreuzberg.extractFile("json/simple.json", "application/json", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/json")
        XCTAssertGreaterThanOrEqual(result.content.count, 5)
    }

    func testSmokePdfBasic() async throws {
        // Smoke test: PDF with simple text extraction
        let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", "application/pdf", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
        XCTAssertGreaterThanOrEqual(result.content.count, 50)
        XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("To Whom it May Concern"), "expected to contain at least one of the specified values")
    }

    func testSmokeTxtBasic() async throws {
        // Smoke test: Plain text file
        let result = try await Kreuzberg.extractFile("text/report.txt", "text/plain", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/plain")
        XCTAssertGreaterThanOrEqual(result.content.count, 5)
    }

    func testSmokeXlsxBasic() async throws {
        // Smoke test: XLSX with basic spreadsheet data including tables
        let result = try await Kreuzberg.extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}")
        XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
        XCTAssertGreaterThanOrEqual(result.content.count, 100)
        XCTAssertTrue(result.content.contains("Team"), "expected to contain: \("Team")")
        XCTAssertTrue(result.content.contains("Location"), "expected to contain: \("Location")")
        XCTAssertTrue(result.content.contains("Stanley Cups"), "expected to contain: \("Stanley Cups")")
        XCTAssertTrue(result.content.contains("Blues"), "expected to contain: \("Blues")")
        XCTAssertTrue(result.content.contains("Flyers"), "expected to contain: \("Flyers")")
        XCTAssertTrue(result.content.contains("Maple Leafs"), "expected to contain: \("Maple Leafs")")
        XCTAssertTrue(result.content.contains("STL"), "expected to contain: \("STL")")
        XCTAssertTrue(result.content.contains("PHI"), "expected to contain: \("PHI")")
        XCTAssertTrue(result.content.contains("TOR"), "expected to contain: \("TOR")")
        // skipped: field 'tables' not available on result type
        // skipped: field 'metadata.format.excel.sheet_count' not available on result type
        // skipped: field 'metadata.format.excel.sheet_names' not available on result type
    }

}
Nomad changes 2026-06-01 23:40:55 +02:00			`// This file is auto-generated by alef — DO NOT EDIT.`
			`// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75`
			`// To regenerate: alef generate`
			`// To verify freshness: alef verify --exit-code`
			`// Issues & docs: https://github.com/kreuzberg-dev/alef`
			`// swift-format-ignore-file`

			`import XCTest`
			`import Foundation`
			`#if canImport(FoundationNetworking)`
			`import FoundationNetworking`
			`#endif`
			`import Kreuzberg`
			`import RustBridge`

			`/// E2e tests for category: smoke.`
			`final class SmokeTests: XCTestCase {`
			`override class func setUp() {`
			`super.setUp()`
			`let _testDocs = URL(fileURLWithPath: #filePath)`
			`.deletingLastPathComponent() // <Module>Tests/`
			`.deletingLastPathComponent() // Tests/`
			`.deletingLastPathComponent() // swift/`
			`.deletingLastPathComponent() // packages/`
			`.deletingLastPathComponent() // <repo root>`
			`.appendingPathComponent("test_documents")`
			`if FileManager.default.fileExists(atPath: _testDocs.path) {`
			`FileManager.default.changeCurrentDirectoryPath(_testDocs.path)`
			`}`
			`}`

			`func testOcrImagePng() throws {`
			`// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.`
			`let result = try Kreuzberg.extractBytes("images/test_hello_world.png", "image/png", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 1)`
			`XCTAssertTrue(result.content.contains("Hello") \|\| result.content.contains("World") \|\| result.content.contains("hello") \|\| result.content.contains("world"), "expected to contain at least one of the specified values")`
			`}`

			`func testSmokeDocxBasic() async throws {`
			`// Smoke test: DOCX with formatted text`
			`let result = try await Kreuzberg.extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 20)`
			`XCTAssertTrue(result.content.contains("Lorem") \|\| result.content.contains("ipsum") \|\| result.content.contains("document") \|\| result.content.contains("text"), "expected to contain at least one of the specified values")`
			`}`

			`func testSmokeHtmlBasic() async throws {`
			`// Smoke test: HTML table extraction`
			`let result = try await Kreuzberg.extractFile("html/simple_table.html", "text/html", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/html")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 10)`
			`XCTAssertTrue(result.content.contains("Sample Data Table") \|\| result.content.contains("Laptop") \|\| result.content.contains("Electronics") \|\| result.content.contains("Product"), "expected to contain at least one of the specified values")`
			`}`

			`func testSmokeImagePng() async throws {`
			`// Smoke test: PNG image (without OCR, metadata only)`
			`let result = try await Kreuzberg.extractFile("images/sample.png", nil, "{\"disable_ocr\":true}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")`
			`}`

			`func testSmokeJsonBasic() async throws {`
			`// Smoke test: JSON file extraction`
			`let result = try await Kreuzberg.extractFile("json/simple.json", "application/json", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/json")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 5)`
			`}`

			`func testSmokePdfBasic() async throws {`
			`// Smoke test: PDF with simple text extraction`
			`let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", "application/pdf", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 50)`
			`XCTAssertTrue(result.content.contains("May 5, 2023") \|\| result.content.contains("To Whom it May Concern"), "expected to contain at least one of the specified values")`
			`}`

			`func testSmokeTxtBasic() async throws {`
			`// Smoke test: Plain text file`
			`let result = try await Kreuzberg.extractFile("text/report.txt", "text/plain", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/plain")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 5)`
			`}`

			`func testSmokeXlsxBasic() async throws {`
			`// Smoke test: XLSX with basic spreadsheet data including tables`
			`let result = try await Kreuzberg.extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}")`
			`XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")`
			`XCTAssertGreaterThanOrEqual(result.content.count, 100)`
			`XCTAssertTrue(result.content.contains("Team"), "expected to contain: \("Team")")`
			`XCTAssertTrue(result.content.contains("Location"), "expected to contain: \("Location")")`
			`XCTAssertTrue(result.content.contains("Stanley Cups"), "expected to contain: \("Stanley Cups")")`
			`XCTAssertTrue(result.content.contains("Blues"), "expected to contain: \("Blues")")`
			`XCTAssertTrue(result.content.contains("Flyers"), "expected to contain: \("Flyers")")`
			`XCTAssertTrue(result.content.contains("Maple Leafs"), "expected to contain: \("Maple Leafs")")`
			`XCTAssertTrue(result.content.contains("STL"), "expected to contain: \("STL")")`
			`XCTAssertTrue(result.content.contains("PHI"), "expected to contain: \("PHI")")`
			`XCTAssertTrue(result.content.contains("TOR"), "expected to contain: \("TOR")")`
			`// skipped: field 'tables' not available on result type`
			`// skipped: field 'metadata.format.excel.sheet_count' not available on result type`
			`// skipped: field 'metadata.format.excel.sheet_names' not available on result type`
			`}`

			`}`