// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef // swift-format-ignore-file import XCTest import Foundation #if canImport(FoundationNetworking) import FoundationNetworking #endif import Kreuzberg import RustBridge /// E2e tests for category: contract. final class ContractTests: XCTestCase { override class func setUp() { super.setUp() let _testDocs = URL(fileURLWithPath: #filePath) .deletingLastPathComponent() // Tests/ .deletingLastPathComponent() // Tests/ .deletingLastPathComponent() // swift/ .deletingLastPathComponent() // packages/ .deletingLastPathComponent() // .appendingPathComponent("test_documents") if FileManager.default.fileExists(atPath: _testDocs.path) { FileManager.default.changeCurrentDirectoryPath(_testDocs.path) } } func testApiBatchBytesAsync() throws { // Tests async batch bytes extraction API (batch_extract_bytes) let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values") } func testApiBatchBytesWithConfigsAsync() throws { // Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter) let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'metadata.output_format' not available on result type } func testApiBatchFileAsync() throws { // Tests async batch file extraction API (batch_extract_file) let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values") } func testApiBatchFileWithConfigsAsync() throws { // Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter) let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'metadata.output_format' not available on result type } func testApiExtractBytesAsync() throws { // Tests async bytes extraction API (extract_bytes) let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values") } func testApiExtractFileAsync() async throws { // Tests async file extraction API (extract_file) let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values") } func testConfigChunkingPrependHeadingContext() throws { // Tests markdown chunker prepends heading hierarchy to chunk content let result = try Kreuzberg.extractFileSync("markdown/extraction_test.md", nil, "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'chunks' not available on result type // skipped: field 'chunks_have_content' not available on result type // skipped: field 'chunks_have_heading_context' not available on result type // skipped: field 'first_chunk_starts_with_heading' not available on result type } func testConfigDocumentStructureWithHeadings() throws { // Tests document structure with DOCX heading-driven nesting let result = try Kreuzberg.extractFileSync("docx/fake.docx", nil, "{\"include_document_structure\":true}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document") // skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type } func testConfigElementTypes() throws { // Tests element-based result format with element type assertions on DOCX let result = try Kreuzberg.extractFileSync("docx/unit_test_headers.docx", nil, "{\"result_format\":\"element_based\"}") XCTAssertTrue(result.mimeType.contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values") // skipped: field 'elements' not available on result type } func testConfigExtractionTimeout() throws { // Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"extraction_timeout_secs\":300}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) } func testConfigKeywords() throws { // Tests keyword extraction via YAKE algorithm let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'keywords' not available on result type // skipped: field 'keywords' not available on result type } func testConfigPages() throws { // Tests page extraction and page marker configuration let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) XCTAssertTrue(result.content.contains("PAGE"), "expected to contain at least one of the specified values") } func testConfigQualityEnabled() throws { // Tests quality scoring produces a score value in [0.0, 1.0] let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"enable_quality_processing\":true}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type } func testConfigSecurityLimits() throws { // Tests archive extraction with custom security limits let result = try Kreuzberg.extractFileSync("archives/documents.zip", nil, "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}") XCTAssertTrue(result.mimeType.contains("application/zip") || result.mimeType.contains("application/x-zip-compressed"), "expected to contain at least one of the specified values") XCTAssertGreaterThanOrEqual(result.content.count, 10) } func testConfigTreeSitter() throws { // Tests tree-sitter configuration round-trip let result = try Kreuzberg.extractFileSync("code/hello.py", nil, "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/x-source-code") XCTAssertGreaterThanOrEqual(result.content.count, 5) } func testOutputFormatBytesMarkdown() throws { // Tests markdown output format via bytes extraction API let result = try Kreuzberg.extractBytesSync("pdf/fake_memo.pdf", "application/pdf", "{\"output_format\":\"markdown\"}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'metadata.output_format' not available on result type } func testOutputFormatMarkdown() throws { // Tests Markdown output format let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}") XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf") XCTAssertGreaterThanOrEqual(result.content.count, 10) // skipped: field 'metadata.output_format' not available on result type } }