Files
fil/e2e/swift_e2e/Tests/KreuzbergE2ETests/SmokeTests.swift

104 lines
6.2 KiB
Swift
Raw Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: smoke.
final class SmokeTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testOcrImagePng() throws {
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
let result = try Kreuzberg.extractBytes("images/test_hello_world.png", "image/png", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")
XCTAssertGreaterThanOrEqual(result.content.count, 1)
XCTAssertTrue(result.content.contains("Hello") || result.content.contains("World") || result.content.contains("hello") || result.content.contains("world"), "expected to contain at least one of the specified values")
}
func testSmokeDocxBasic() async throws {
// Smoke test: DOCX with formatted text
let result = try await Kreuzberg.extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
XCTAssertGreaterThanOrEqual(result.content.count, 20)
XCTAssertTrue(result.content.contains("Lorem") || result.content.contains("ipsum") || result.content.contains("document") || result.content.contains("text"), "expected to contain at least one of the specified values")
}
func testSmokeHtmlBasic() async throws {
// Smoke test: HTML table extraction
let result = try await Kreuzberg.extractFile("html/simple_table.html", "text/html", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/html")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("Sample Data Table") || result.content.contains("Laptop") || result.content.contains("Electronics") || result.content.contains("Product"), "expected to contain at least one of the specified values")
}
func testSmokeImagePng() async throws {
// Smoke test: PNG image (without OCR, metadata only)
let result = try await Kreuzberg.extractFile("images/sample.png", nil, "{\"disable_ocr\":true}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")
}
func testSmokeJsonBasic() async throws {
// Smoke test: JSON file extraction
let result = try await Kreuzberg.extractFile("json/simple.json", "application/json", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/json")
XCTAssertGreaterThanOrEqual(result.content.count, 5)
}
func testSmokePdfBasic() async throws {
// Smoke test: PDF with simple text extraction
let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", "application/pdf", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 50)
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("To Whom it May Concern"), "expected to contain at least one of the specified values")
}
func testSmokeTxtBasic() async throws {
// Smoke test: Plain text file
let result = try await Kreuzberg.extractFile("text/report.txt", "text/plain", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/plain")
XCTAssertGreaterThanOrEqual(result.content.count, 5)
}
func testSmokeXlsxBasic() async throws {
// Smoke test: XLSX with basic spreadsheet data including tables
let result = try await Kreuzberg.extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
XCTAssertGreaterThanOrEqual(result.content.count, 100)
XCTAssertTrue(result.content.contains("Team"), "expected to contain: \("Team")")
XCTAssertTrue(result.content.contains("Location"), "expected to contain: \("Location")")
XCTAssertTrue(result.content.contains("Stanley Cups"), "expected to contain: \("Stanley Cups")")
XCTAssertTrue(result.content.contains("Blues"), "expected to contain: \("Blues")")
XCTAssertTrue(result.content.contains("Flyers"), "expected to contain: \("Flyers")")
XCTAssertTrue(result.content.contains("Maple Leafs"), "expected to contain: \("Maple Leafs")")
XCTAssertTrue(result.content.contains("STL"), "expected to contain: \("STL")")
XCTAssertTrue(result.content.contains("PHI"), "expected to contain: \("PHI")")
XCTAssertTrue(result.content.contains("TOR"), "expected to contain: \("TOR")")
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}