Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

19
e2e/swift_e2e/Package.swift generated Normal file
View File

@@ -0,0 +1,19 @@
// swift-tools-version: 6.0
import PackageDescription
let package = Package(
name: "E2eSwift",
platforms: [
.macOS(.v13),
.iOS(.v16),
],
dependencies: [
.package(path: "../../packages/swift"),
],
targets: [
.testTarget(
name: "KreuzbergE2ETests",
dependencies: [.product(name: "Kreuzberg", package: "swift")]
),
]
)

View File

@@ -0,0 +1,59 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: async.
final class AsyncTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testAsyncExtractBytes() throws {
// Async extract_bytes call on PDF document
let result = try Kreuzberg.extractBytes("pdf/fake_memo.pdf", "application/pdf", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 50)
}
func testAsyncExtractBytesEmptyMime() throws {
// extract_bytes empty MIME async
do {
_ = try Kreuzberg.extractBytes("text/plain.txt", "", "{}")
XCTFail("expected to throw")
} catch {
// success
}
}
func testAsyncExtractBytesInvalidMime() throws {
// extract_bytes unsupported MIME async
do {
_ = try Kreuzberg.extractBytes("text/plain.txt", "application/x-nonexistent", "{}")
XCTFail("expected to throw")
} catch {
// success
}
}
}

View File

@@ -0,0 +1,93 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: batch.
final class BatchTests: XCTestCase {
func testBatchBytesInvalidMime() throws {
// batch_extract_bytes_sync invalid MIME
let _item_itemsArray_0 = try Kreuzberg.batchBytesItemFromJson("{\"content\":[72,101,108,108,111],\"mime_type\":\"application/x-nonexistent\"}")
let itemsArray = [_item_itemsArray_0]
let result = try Kreuzberg.batchExtractBytesSync(items: itemsArray)
}
func testBatchExtractBytesHappy() async throws {
// batch_extract_bytes: happy path with mixed inputs
let _item_itemsArray_0 = try Kreuzberg.batchBytesItemFromJson("{\"content\":[72,101,108,108,111,44,32,119,111,114,108,100,33],\"mime_type\":\"text/plain\"}")
let _item_itemsArray_1 = try Kreuzberg.batchBytesItemFromJson("{\"content\":[60,104,116,109,108,62,60,98,111,100,121,62,84,101,115,116,60,47,98,111,100,121,62,60,47,104,116,109,108,62],\"mime_type\":\"text/html\"}")
let itemsArray = [_item_itemsArray_0, _item_itemsArray_1]
let result = try await Kreuzberg.batchExtractBytes(items: itemsArray)
XCTAssertGreaterThanOrEqual(result.count, 1)
}
func testBatchExtractBytesMixedFormat() async throws {
// batch_extract_bytes: handles unsupported MIME gracefully
let _item_itemsArray_0 = try Kreuzberg.batchBytesItemFromJson("{\"content\":[80,68,70,32,112,108,97,99,101,104,111,108,100,101,114],\"mime_type\":\"application/x-unknown\"}")
let itemsArray = [_item_itemsArray_0]
let result = try await Kreuzberg.batchExtractBytes(items: itemsArray)
}
func testBatchExtractBytesSyncEmptyList() throws {
// batch_extract_bytes_sync: empty batch
let result = try Kreuzberg.batchExtractBytesSync(items: [])
XCTAssertEqual(result.count, 0)
}
func testBatchExtractBytesSyncInvalidMime() throws {
// batch_extract_bytes_sync: unsupported MIME
let _item_itemsArray_0 = try Kreuzberg.batchBytesItemFromJson("{\"content\":[100,97,116,97],\"mime_type\":\"application/x-unknown\"}")
let itemsArray = [_item_itemsArray_0]
let result = try Kreuzberg.batchExtractBytesSync(items: itemsArray)
}
func testBatchFileAsyncBasic() async throws {
// Extract text from multiple files asynchronously
let _item_pathsArray_0 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"pdf/fake_memo.pdf\"}")
let _item_pathsArray_1 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"text/fake_text.txt\"}")
let pathsArray = [_item_pathsArray_0, _item_pathsArray_1]
let result = try await Kreuzberg.batchExtractFiles(paths: pathsArray)
}
func testBatchFileAsyncNotFound() async throws {
// batch_extract_file async nonexistent
let _item_pathsArray_0 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"/nonexistent/a.pdf\"}")
let pathsArray = [_item_pathsArray_0]
let result = try await Kreuzberg.batchExtractFiles(paths: pathsArray)
}
func testBatchFileNotFound() throws {
// batch_extract_file_sync nonexistent
let _item_pathsArray_0 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"/nonexistent/a.pdf\"}")
let _item_pathsArray_1 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"/nonexistent/b.txt\"}")
let pathsArray = [_item_pathsArray_0, _item_pathsArray_1]
let result = try Kreuzberg.batchExtractFilesSync(paths: pathsArray)
}
func testBatchFilePartial() throws {
// batch_extract_file_sync mixed
let _item_pathsArray_0 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"text/plain.txt\"}")
let _item_pathsArray_1 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"/nonexistent/missing.pdf\"}")
let pathsArray = [_item_pathsArray_0, _item_pathsArray_1]
let result = try Kreuzberg.batchExtractFilesSync(paths: pathsArray)
}
func testBatchFileSyncBasic() throws {
// Extract text from multiple files synchronously
let _item_pathsArray_0 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"pdf/fake_memo.pdf\"}")
let _item_pathsArray_1 = try Kreuzberg.batchFileItemFromJson("{\"path\":\"text/fake_text.txt\"}")
let pathsArray = [_item_pathsArray_0, _item_pathsArray_1]
let result = try Kreuzberg.batchExtractFilesSync(paths: pathsArray)
}
}

View File

@@ -0,0 +1,41 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: code.
final class CodeTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testCodeShebangDetection() throws {
// Test language detection from shebang line via bytes input
let result = try Kreuzberg.extractFileSync("code/script.sh", "text/x-source-code", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/x-source-code")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("build"), "expected to contain: \("build")")
XCTAssertTrue(result.content.contains("clean"), "expected to contain: \("clean")")
}
}

View File

@@ -0,0 +1,169 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: contract.
final class ContractTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testApiBatchBytesAsync() throws {
// Tests async batch bytes extraction API (batch_extract_bytes)
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
}
func testApiBatchBytesWithConfigsAsync() throws {
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'metadata.output_format' not available on result type
}
func testApiBatchFileAsync() throws {
// Tests async batch file extraction API (batch_extract_file)
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
}
func testApiBatchFileWithConfigsAsync() throws {
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'metadata.output_format' not available on result type
}
func testApiExtractBytesAsync() throws {
// Tests async bytes extraction API (extract_bytes)
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
}
func testApiExtractFileAsync() async throws {
// Tests async file extraction API (extract_file)
let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
}
func testConfigChunkingPrependHeadingContext() throws {
// Tests markdown chunker prepends heading hierarchy to chunk content
let result = try Kreuzberg.extractFileSync("markdown/extraction_test.md", nil, "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'chunks' not available on result type
// skipped: field 'chunks_have_content' not available on result type
// skipped: field 'chunks_have_heading_context' not available on result type
// skipped: field 'first_chunk_starts_with_heading' not available on result type
}
func testConfigDocumentStructureWithHeadings() throws {
// Tests document structure with DOCX heading-driven nesting
let result = try Kreuzberg.extractFileSync("docx/fake.docx", nil, "{\"include_document_structure\":true}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
// skipped: field 'document' not available on result type
// skipped: field 'document.nodes' not available on result type
}
func testConfigElementTypes() throws {
// Tests element-based result format with element type assertions on DOCX
let result = try Kreuzberg.extractFileSync("docx/unit_test_headers.docx", nil, "{\"result_format\":\"element_based\"}")
XCTAssertTrue(result.mimeType.contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values")
// skipped: field 'elements' not available on result type
}
func testConfigExtractionTimeout() throws {
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"extraction_timeout_secs\":300}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
}
func testConfigKeywords() throws {
// Tests keyword extraction via YAKE algorithm
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'keywords' not available on result type
// skipped: field 'keywords' not available on result type
}
func testConfigPages() throws {
// Tests page extraction and page marker configuration
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("PAGE"), "expected to contain at least one of the specified values")
}
func testConfigQualityEnabled() throws {
// Tests quality scoring produces a score value in [0.0, 1.0]
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"enable_quality_processing\":true}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
}
func testConfigSecurityLimits() throws {
// Tests archive extraction with custom security limits
let result = try Kreuzberg.extractFileSync("archives/documents.zip", nil, "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}")
XCTAssertTrue(result.mimeType.contains("application/zip") || result.mimeType.contains("application/x-zip-compressed"), "expected to contain at least one of the specified values")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
}
func testConfigTreeSitter() throws {
// Tests tree-sitter configuration round-trip
let result = try Kreuzberg.extractFileSync("code/hello.py", nil, "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/x-source-code")
XCTAssertGreaterThanOrEqual(result.content.count, 5)
}
func testOutputFormatBytesMarkdown() throws {
// Tests markdown output format via bytes extraction API
let result = try Kreuzberg.extractBytesSync("pdf/fake_memo.pdf", "application/pdf", "{\"output_format\":\"markdown\"}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'metadata.output_format' not available on result type
}
func testOutputFormatMarkdown() throws {
// Tests Markdown output format
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
// skipped: field 'metadata.output_format' not available on result type
}
}

View File

@@ -0,0 +1,57 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: detection.
final class DetectionTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testDetectMimeBytesHtml() throws {
// Detect HTML MIME from bytes
let result = try Kreuzberg.detectMimeTypeFromBytes("html/html.html")
}
func testDetectMimeBytesPdf() throws {
// Detect PDF MIME type from bytes
let result = try Kreuzberg.detectMimeTypeFromBytes("pdf/fake_memo.pdf")
}
func testDetectMimeBytesPng() throws {
// Detect PNG MIME type from bytes
let result = try Kreuzberg.detectMimeTypeFromBytes("images/test_hello_world.png")
}
func testGetExtensionsUnknownMime() throws {
// get_extensions unknown MIME
do {
_ = try Kreuzberg.getExtensionsForMime(mimeType: "application/x-totally-unknown")
XCTFail("expected to throw")
} catch {
// success
}
}
}

View File

@@ -0,0 +1,28 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: document_extractor_management.
final class DocumentExtractorManagementTests: XCTestCase {
func testDocumentExtractorsClear() throws {
// Clear all document extractors and verify list is empty
let result = try Kreuzberg.clearDocumentExtractors()
}
func testExtractorsList() throws {
// List all registered document extractors
let result = try Kreuzberg.listDocumentExtractors()
}
}

View File

@@ -0,0 +1,24 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: embed_extra.
final class EmbedExtraTests: XCTestCase {
func testEmbedTextsBatch() throws {
// Batch embed texts
let configObj = try Kreuzberg.embeddingConfigFromJson("{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
let result = try Kreuzberg.embedTexts(texts: ["Hello", "World"], config: configObj)
}
}

View File

@@ -0,0 +1,28 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: embedding_backend_management.
final class EmbeddingBackendManagementTests: XCTestCase {
func testEmbeddingBackendsClear() throws {
// Clear all embedding backends and verify list is empty
let result = try Kreuzberg.clearEmbeddingBackends()
}
func testEmbeddingBackendsList() throws {
// List all registered embedding backends
let result = try Kreuzberg.listEmbeddingBackends()
}
}

View File

@@ -0,0 +1,47 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: embeddings.
final class EmbeddingsTests: XCTestCase {
func testEmbedTextsDifferentPreset() throws {
// embed_texts: multilingual preset
let configObj = try Kreuzberg.embeddingConfigFromJson("{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}")
let result = try Kreuzberg.embedTexts(texts: ["Hello world", "Test"], config: configObj)
// skipped: field 'embeddings' not available on result type
}
func testGetEmbeddingPresetKnown() throws {
// get_embedding_preset: known preset
let result = try Kreuzberg.getEmbeddingPreset(name: "balanced")
}
func testGetEmbeddingPresetNominal() throws {
// get_embedding_preset: nominal case
let result = try Kreuzberg.getEmbeddingPreset(name: "balanced")
}
func testGetEmbeddingPresetUnknown() throws {
// get_embedding_preset: unknown preset fails
let result = try Kreuzberg.getEmbeddingPreset(name: "nonexistent-xyz")
XCTAssertNil(result, "expected nil value")
}
func testListEmbeddingPresetsSanity() throws {
// list_embedding_presets: returns at least one
let result = try Kreuzberg.listEmbeddingPresets()
XCTAssertFalse(result.isEmpty, "expected non-empty value")
}
}

View File

@@ -0,0 +1,77 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: error.
final class ErrorTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testErrorEmptyBytes() throws {
// Graceful handling of empty bytes (should not error)
let result = try Kreuzberg.extractBytesSync("text/empty.txt", "text/plain", "{}")
}
func testErrorEmptyMime() throws {
// Error when extracting with empty MIME type
do {
_ = try Kreuzberg.extractBytesSync("text/plain.txt", "", "{}")
XCTFail("expected to throw")
} catch {
// success
}
}
func testErrorExtractBytesConflictingOcr() throws {
// extract_bytes force+disable OCR
do {
_ = try Kreuzberg.extractBytesSync("text/fake_text.txt", "text/plain", "{\"disable_ocr\":true,\"force_ocr\":true}")
XCTFail("expected to throw")
} catch {
// success
}
}
func testErrorInvalidMimeFormat() throws {
// Error when extracting with invalid MIME type format
do {
_ = try Kreuzberg.extractBytesSync("text/plain.txt", "not-a-mime", "{}")
XCTFail("expected to throw")
} catch {
// success
}
}
func testErrorUnsupportedMime() throws {
// Error when extracting with unsupported MIME type
do {
_ = try Kreuzberg.extractBytesSync("text/plain.txt", "application/x-nonexistent", "{}")
XCTFail("expected to throw")
} catch {
// success
}
}
}

View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: format_specific.
final class FormatSpecificTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testFormatDocxStandalone() throws {
// Standalone DOCX extraction using extract_bytes_sync
let result = try Kreuzberg.extractBytesSync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}")
XCTAssertGreaterThanOrEqual(result.content.count, 20)
}
func testFormatHwpxStandalone() throws {
// Standalone HWPX extraction using extract_bytes_sync
let result = try Kreuzberg.extractBytesSync("hwpx/simple.hwpx", "application/haansofthwpx", "{}")
XCTAssertGreaterThanOrEqual(result.content.count, 20)
XCTAssertTrue("Hello from HWPX".isEmpty || result.content.contains("Hello from HWPX"), "expected to contain: \("Hello from HWPX")")
}
func testFormatPdfText() throws {
// Standalone PDF text extraction using extract_bytes_sync
let result = try Kreuzberg.extractBytesSync("pdf/fake_memo.pdf", "application/pdf", "{}")
XCTAssertGreaterThanOrEqual(result.content.count, 50)
XCTAssertTrue(result.content.contains("Mallori") || result.content.contains("May"), "expected to contain at least one of the specified values")
}
func testFormatPptx() throws {
// PPTX presentation extraction using extract_file_sync
let result = try Kreuzberg.extractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "{}")
}
func testFormatXlsx() throws {
// XLSX spreadsheet extraction using extract_file_sync
let result = try Kreuzberg.extractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}")
}
}

View File

@@ -0,0 +1,50 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: mime_utilities.
final class MimeUtilitiesTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testMimeDetectBytes() throws {
// Detect MIME type from file bytes
let result = try Kreuzberg.detectMimeTypeFromBytes("pdf/fake_memo.pdf")
XCTAssertTrue("pdf".isEmpty || result.contains("pdf"), "expected to contain: \("pdf")")
}
func testMimeDetectImage() throws {
// Detect MIME type from PNG image bytes
let result = try Kreuzberg.detectMimeTypeFromBytes("images/test_hello_world.png")
XCTAssertTrue("png".isEmpty || result.contains("png"), "expected to contain: \("png")")
}
func testMimeGetExtensions() throws {
// Get file extensions for a MIME type
let result = try Kreuzberg.getExtensionsForMime(mimeType: "application/pdf")
XCTAssertTrue("pdf".isEmpty || result.contains("pdf"), "expected to contain: \("pdf")")
}
}

View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: ocr_backend_management.
final class OcrBackendManagementTests: XCTestCase {
func testOcrBackendsClear() throws {
// Clear all OCR backends and verify list is empty
let result = try Kreuzberg.clearOcrBackends()
}
func testOcrBackendsList() throws {
// List all registered OCR backends
let result = try Kreuzberg.listOcrBackends()
}
func testOcrBackendsUnregister() throws {
// Unregister nonexistent OCR backend gracefully
let result = try Kreuzberg.unregisterOcrBackend(name: "nonexistent-backend-xyz")
}
}

View File

@@ -0,0 +1,52 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: pdf.
final class PdfTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testRenderPdfPageFirst() throws {
// render_pdf_page_to_png: first page
let pdfBytesData = try Data(contentsOf: URL(fileURLWithPath: "pdf/fake_memo.pdf"))
let pdfBytesBytes = Array(pdfBytesData)
let result = try Kreuzberg.renderPdfPageToPng(pdfBytes: pdfBytesBytes, pageIndex: 0, dpi: nil, password: nil)
XCTAssertGreaterThanOrEqual(result.count, 100)
}
func testRenderPdfPageOutOfRange() throws {
// render_pdf_page_to_png: page out of range
do {
let pdfBytesData = try Data(contentsOf: URL(fileURLWithPath: "pdf/fake_memo.pdf"))
let pdfBytesBytes = Array(pdfBytesData)
_ = try Kreuzberg.renderPdfPageToPng(pdfBytes: pdfBytesBytes, pageIndex: 999, dpi: nil, password: nil)
XCTFail("expected to throw")
} catch {
// success
}
}
}

View File

@@ -0,0 +1,114 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: plugin_api.
final class PluginApiTests: XCTestCase {
func testRegisterDocumentExtractorTraitBridge() throws {
// register_document_extractor: trait bridge
class TestStubRegisterDocumentExtractorTraitBridge: SwiftDocumentExtractorBridge {
var name: String { "register_document_extractor_trait_bridge" }
func extractBytes(content: Data, mimeType: String, config: ExtractionConfig) async throws -> InternalDocument { InternalDocument() }
func supportedMimeTypes() -> [String] { [] }
}
let result = try Kreuzberg.registerDocumentExtractor(TestStubRegisterDocumentExtractorTraitBridge())
try? Kreuzberg.unregisterDocumentExtractor("swift-bridge-document_extractor")
}
func testRegisterEmbeddingBackendTraitBridge() throws {
// register_embedding_backend: trait bridge
class TestStubRegisterEmbeddingBackendTraitBridge: SwiftEmbeddingBackendBridge {
var name: String { "register_embedding_backend_trait_bridge" }
func dimensions() -> UInt { 0 }
func embed(texts: [String]) async throws -> [[Float]] { [] }
}
let result = try Kreuzberg.registerEmbeddingBackend(TestStubRegisterEmbeddingBackendTraitBridge())
try? Kreuzberg.unregisterEmbeddingBackend("swift-bridge-embedding_backend")
}
func testRegisterOcrBackendTraitBridge() throws {
// register_ocr_backend: trait bridge
class TestStubRegisterOcrBackendTraitBridge: SwiftOcrBackendBridge {
var name: String { "register_ocr_backend_trait_bridge" }
func processImage(imageBytes: Data, config: OcrConfig) async throws -> ExtractionResult { ExtractionResult() }
func supportsLanguage(lang: String) -> Bool { false }
func backendType() -> OcrBackendType { OcrBackendType() }
}
let result = try Kreuzberg.registerOcrBackend(TestStubRegisterOcrBackendTraitBridge())
try? Kreuzberg.unregisterOcrBackend("swift-bridge-ocr_backend")
}
func testRegisterPostProcessorTraitBridge() throws {
// register_post_processor: trait bridge
class TestStubRegisterPostProcessorTraitBridge: SwiftPostProcessorBridge {
var name: String { "register_post_processor_trait_bridge" }
func process(result: ExtractionResult, config: ExtractionConfig) async throws -> Void { () }
func processingStage() -> ProcessingStage { ProcessingStage() }
}
let result = try Kreuzberg.registerPostProcessor(TestStubRegisterPostProcessorTraitBridge())
try? Kreuzberg.unregisterPostProcessor("swift-bridge-post_processor")
}
func testRegisterRendererTraitBridge() throws {
// register_renderer: trait bridge
class TestStubRegisterRendererTraitBridge: SwiftRendererBridge {
var name: String { "register_renderer_trait_bridge" }
func render(doc: InternalDocument) -> String { "" }
}
let result = try Kreuzberg.registerRenderer(TestStubRegisterRendererTraitBridge())
try? Kreuzberg.unregisterRenderer("swift-bridge-renderer")
}
func testRegisterValidatorTraitBridge() throws {
// register_validator: trait bridge
class TestStubRegisterValidatorTraitBridge: SwiftValidatorBridge {
var name: String { "register_validator_trait_bridge" }
func validate(result: ExtractionResult, config: ExtractionConfig) async throws -> Void { () }
}
let result = try Kreuzberg.registerValidator(TestStubRegisterValidatorTraitBridge())
try? Kreuzberg.unregisterValidator("swift-bridge-validator")
}
func testUnregisterDocumentExtractorAfterRegister() throws {
// unregister_document_extractor
let result = try Kreuzberg.unregisterDocumentExtractor(name: "test-extractor")
}
func testUnregisterEmbeddingBackendAfterRegister() throws {
// unregister_embedding_backend
let result = try Kreuzberg.unregisterEmbeddingBackend(name: "test-embedding-backend")
}
func testUnregisterPostProcessorAfterRegister() throws {
// unregister_post_processor
let result = try Kreuzberg.unregisterPostProcessor(name: "test-processor")
}
func testUnregisterRendererAfterRegister() throws {
// unregister_renderer
let result = try Kreuzberg.unregisterRenderer(name: "test-renderer")
}
func testUnregisterValidatorAfterRegister() throws {
// unregister_validator
let result = try Kreuzberg.unregisterValidator(name: "test-validator")
}
}

View File

@@ -0,0 +1,28 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: post_processor_management.
final class PostProcessorManagementTests: XCTestCase {
func testPostProcessorsClear() throws {
// Clear all post-processors and verify list is empty
let result = try Kreuzberg.clearPostProcessors()
}
func testPostProcessorsList() throws {
// List all registered post-processors
let result = try Kreuzberg.listPostProcessors()
}
}

View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: registry_operations.
final class RegistryOperationsTests: XCTestCase {
func testExtensionsDocx() throws {
// Get file extensions for DOCX MIME type
let result = try Kreuzberg.getExtensionsForMime(mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
}
func testExtensionsHtml() throws {
// Get file extensions for HTML MIME type
let result = try Kreuzberg.getExtensionsForMime(mimeType: "text/html")
}
func testExtensionsPdf() throws {
// Get file extensions for PDF MIME type
let result = try Kreuzberg.getExtensionsForMime(mimeType: "application/pdf")
}
}

View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: registry.
final class RegistryTests: XCTestCase {
func testListDocumentExtractors() throws {
// List document extractors
let result = try Kreuzberg.listDocumentExtractors()
}
func testListEmbeddingBackends() throws {
// List embedding backends
let result = try Kreuzberg.listEmbeddingBackends()
}
func testListOcrBackends() throws {
// List OCR backends
let result = try Kreuzberg.listOcrBackends()
}
func testListPostProcessors() throws {
// List post-processors
let result = try Kreuzberg.listPostProcessors()
}
func testListRenderers() throws {
// List renderers
let result = try Kreuzberg.listRenderers()
}
func testListValidators() throws {
// List validators
let result = try Kreuzberg.listValidators()
}
}

View File

@@ -0,0 +1,28 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: renderer_management.
final class RendererManagementTests: XCTestCase {
func testRenderersClear() throws {
// Clear all renderers and verify list is empty
let result = try Kreuzberg.clearRenderers()
}
func testRenderersList() throws {
// List all registered renderers
let result = try Kreuzberg.listRenderers()
}
}

View File

@@ -0,0 +1,103 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: smoke.
final class SmokeTests: XCTestCase {
override class func setUp() {
super.setUp()
let _testDocs = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // <Module>Tests/
.deletingLastPathComponent() // Tests/
.deletingLastPathComponent() // swift/
.deletingLastPathComponent() // packages/
.deletingLastPathComponent() // <repo root>
.appendingPathComponent("test_documents")
if FileManager.default.fileExists(atPath: _testDocs.path) {
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
}
}
func testOcrImagePng() throws {
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
let result = try Kreuzberg.extractBytes("images/test_hello_world.png", "image/png", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")
XCTAssertGreaterThanOrEqual(result.content.count, 1)
XCTAssertTrue(result.content.contains("Hello") || result.content.contains("World") || result.content.contains("hello") || result.content.contains("world"), "expected to contain at least one of the specified values")
}
func testSmokeDocxBasic() async throws {
// Smoke test: DOCX with formatted text
let result = try await Kreuzberg.extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
XCTAssertGreaterThanOrEqual(result.content.count, 20)
XCTAssertTrue(result.content.contains("Lorem") || result.content.contains("ipsum") || result.content.contains("document") || result.content.contains("text"), "expected to contain at least one of the specified values")
}
func testSmokeHtmlBasic() async throws {
// Smoke test: HTML table extraction
let result = try await Kreuzberg.extractFile("html/simple_table.html", "text/html", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/html")
XCTAssertGreaterThanOrEqual(result.content.count, 10)
XCTAssertTrue(result.content.contains("Sample Data Table") || result.content.contains("Laptop") || result.content.contains("Electronics") || result.content.contains("Product"), "expected to contain at least one of the specified values")
}
func testSmokeImagePng() async throws {
// Smoke test: PNG image (without OCR, metadata only)
let result = try await Kreuzberg.extractFile("images/sample.png", nil, "{\"disable_ocr\":true}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "image/png")
}
func testSmokeJsonBasic() async throws {
// Smoke test: JSON file extraction
let result = try await Kreuzberg.extractFile("json/simple.json", "application/json", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/json")
XCTAssertGreaterThanOrEqual(result.content.count, 5)
}
func testSmokePdfBasic() async throws {
// Smoke test: PDF with simple text extraction
let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", "application/pdf", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
XCTAssertGreaterThanOrEqual(result.content.count, 50)
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("To Whom it May Concern"), "expected to contain at least one of the specified values")
}
func testSmokeTxtBasic() async throws {
// Smoke test: Plain text file
let result = try await Kreuzberg.extractFile("text/report.txt", "text/plain", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/plain")
XCTAssertGreaterThanOrEqual(result.content.count, 5)
}
func testSmokeXlsxBasic() async throws {
// Smoke test: XLSX with basic spreadsheet data including tables
let result = try await Kreuzberg.extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}")
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
XCTAssertGreaterThanOrEqual(result.content.count, 100)
XCTAssertTrue(result.content.contains("Team"), "expected to contain: \("Team")")
XCTAssertTrue(result.content.contains("Location"), "expected to contain: \("Location")")
XCTAssertTrue(result.content.contains("Stanley Cups"), "expected to contain: \("Stanley Cups")")
XCTAssertTrue(result.content.contains("Blues"), "expected to contain: \("Blues")")
XCTAssertTrue(result.content.contains("Flyers"), "expected to contain: \("Flyers")")
XCTAssertTrue(result.content.contains("Maple Leafs"), "expected to contain: \("Maple Leafs")")
XCTAssertTrue(result.content.contains("STL"), "expected to contain: \("STL")")
XCTAssertTrue(result.content.contains("PHI"), "expected to contain: \("PHI")")
XCTAssertTrue(result.content.contains("TOR"), "expected to contain: \("TOR")")
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}

View File

@@ -0,0 +1,127 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import Foundation
#if canImport(FoundationNetworking)
// URLSession, URLRequest, HTTPURLResponse, and URLSessionTaskDelegate live in
// the FoundationNetworking submodule on swift-corelibs-foundation (Linux). On
// Apple platforms these types remain in plain Foundation and this submodule
// does not exist; the canImport guard skips the import there.
import FoundationNetworking
#endif
import RustBridge
// Make `RustString` print its content in XCTest failure output. Without this,
// every error thrown from the swift-bridge layer surfaces as
// `caught error: "RustBridge.RustString"` with the actual message hidden
// inside the opaque class instance. The `@retroactive` keyword acknowledges
// that the conformed-to protocol (`CustomStringConvertible`) and the
// conforming type (`RustString`) both live outside this module required by
// Swift 6 to silence the retroactive-conformance warning. swift-bridge does
// not give `RustString` a `description` of its own, so there is no conflict.
extension RustString: @retroactive CustomStringConvertible {
public var description: String { self.toString() }
}
// Spawns the alef mock-server once per test process and exposes its base URL.
// SwiftPM/XCTest has no global "before all tests" hook that can inject environment
// variables (the JVM-style listener trick used by the Java/Kotlin backends), so the
// server is started lazily on first access of `baseURL` and kept alive for the
// lifetime of the process. A pre-set `MOCK_SERVER_URL` (e.g. exported by CI) wins.
enum AlefE2EMockServer {
static let baseURL: String = AlefE2EMockServer.start()
// Retain the child process so it is not reaped while tests run.
nonisolated(unsafe) private static var process: Process?
private static func start() -> String {
if let preset = ProcessInfo.processInfo.environment["MOCK_SERVER_URL"], !preset.isEmpty {
return preset
}
let fileManager = FileManager.default
var dir = URL(fileURLWithPath: fileManager.currentDirectoryPath)
var fixturesDir: URL?
for _ in 0..<16 {
let candidate = dir.appendingPathComponent("fixtures")
var isDir: ObjCBool = false
if fileManager.fileExists(atPath: candidate.path, isDirectory: &isDir), isDir.boolValue {
fixturesDir = candidate
break
}
let parent = dir.deletingLastPathComponent()
if parent.path == dir.path { break }
dir = parent
}
guard let fixtures = fixturesDir else {
fatalError("AlefE2EMockServer: could not locate fixtures/ above \(fileManager.currentDirectoryPath)")
}
let repoRoot = fixtures.deletingLastPathComponent()
let binary = repoRoot.appendingPathComponent("e2e/rust/target/release/mock-server")
guard fileManager.fileExists(atPath: binary.path) else {
fatalError("AlefE2EMockServer: mock-server binary not found at \(binary.path) — run: cargo build --manifest-path e2e/rust/Cargo.toml --bin mock-server --release")
}
let proc = Process()
proc.executableURL = binary
proc.arguments = [fixtures.path]
let stdoutPipe = Pipe()
proc.standardOutput = stdoutPipe
// Keep stdin open so the server does not see EOF and exit immediately.
proc.standardInput = Pipe()
do {
try proc.run()
} catch {
fatalError("AlefE2EMockServer: failed to start mock-server: \(error)")
}
process = proc
let handle = stdoutPipe.fileHandleForReading
var buffer = Data()
var resolved: String?
for _ in 0..<500 {
let chunk = handle.availableData
if chunk.isEmpty { break }
buffer.append(chunk)
if let text = String(data: buffer, encoding: .utf8) {
for line in text.split(separator: "\n") {
if line.hasPrefix("MOCK_SERVER_URL=") {
resolved = String(line.dropFirst("MOCK_SERVER_URL=".count)).trimmingCharacters(in: .whitespacesAndNewlines)
break
}
}
}
if resolved != nil { break }
}
guard let url = resolved else {
proc.terminate()
fatalError("AlefE2EMockServer: mock-server did not emit MOCK_SERVER_URL")
}
// Drain remaining stdout in the background so a full pipe never blocks the server.
DispatchQueue.global(qos: .background).async {
while !handle.availableData.isEmpty {}
}
return url
}
}
// URLSession that does not follow redirects, so tests can assert on 3xx status codes
// and Location headers instead of transparently chasing them to the final response.
final class AlefE2ENoRedirectDelegate: NSObject, URLSessionTaskDelegate {
func urlSession(
_ session: URLSession,
task: URLSessionTask,
willPerformHTTPRedirection response: HTTPURLResponse,
newRequest request: URLRequest,
completionHandler: @escaping (URLRequest?) -> Void
) {
completionHandler(nil)
}
}
let alefE2ESession = URLSession(
configuration: .ephemeral,
delegate: AlefE2ENoRedirectDelegate(),
delegateQueue: nil
)

View File

@@ -0,0 +1,28 @@
// This file is auto-generated by alef DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// swift-format-ignore-file
import XCTest
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
import Kreuzberg
import RustBridge
/// E2e tests for category: validator_management.
final class ValidatorManagementTests: XCTestCase {
func testValidatorsClear() throws {
// Clear all validators and verify list is empty
let result = try Kreuzberg.clearValidators()
}
func testValidatorsList() throws {
// List all registered validators
let result = try Kreuzberg.listValidators()
}
}