170 lines
10 KiB
Swift
Generated
170 lines
10 KiB
Swift
Generated
// This file is auto-generated by alef — DO NOT EDIT.
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
// To regenerate: alef generate
|
|
// To verify freshness: alef verify --exit-code
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
// swift-format-ignore-file
|
|
|
|
import XCTest
|
|
import Foundation
|
|
#if canImport(FoundationNetworking)
|
|
import FoundationNetworking
|
|
#endif
|
|
import Kreuzberg
|
|
import RustBridge
|
|
|
|
/// E2e tests for category: contract.
|
|
final class ContractTests: XCTestCase {
|
|
override class func setUp() {
|
|
super.setUp()
|
|
let _testDocs = URL(fileURLWithPath: #filePath)
|
|
.deletingLastPathComponent() // <Module>Tests/
|
|
.deletingLastPathComponent() // Tests/
|
|
.deletingLastPathComponent() // swift/
|
|
.deletingLastPathComponent() // packages/
|
|
.deletingLastPathComponent() // <repo root>
|
|
.appendingPathComponent("test_documents")
|
|
if FileManager.default.fileExists(atPath: _testDocs.path) {
|
|
FileManager.default.changeCurrentDirectoryPath(_testDocs.path)
|
|
}
|
|
}
|
|
|
|
func testApiBatchBytesAsync() throws {
|
|
// Tests async batch bytes extraction API (batch_extract_bytes)
|
|
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
|
|
}
|
|
|
|
func testApiBatchBytesWithConfigsAsync() throws {
|
|
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
|
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'metadata.output_format' not available on result type
|
|
}
|
|
|
|
func testApiBatchFileAsync() throws {
|
|
// Tests async batch file extraction API (batch_extract_file)
|
|
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
|
|
}
|
|
|
|
func testApiBatchFileWithConfigsAsync() throws {
|
|
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
|
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'metadata.output_format' not available on result type
|
|
}
|
|
|
|
func testApiExtractBytesAsync() throws {
|
|
// Tests async bytes extraction API (extract_bytes)
|
|
let result = try Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
|
|
}
|
|
|
|
func testApiExtractFileAsync() async throws {
|
|
// Tests async file extraction API (extract_file)
|
|
let result = try await Kreuzberg.extractFile("pdf/fake_memo.pdf", nil, "{}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
XCTAssertTrue(result.content.contains("May 5, 2023") || result.content.contains("Mallori"), "expected to contain at least one of the specified values")
|
|
}
|
|
|
|
func testConfigChunkingPrependHeadingContext() throws {
|
|
// Tests markdown chunker prepends heading hierarchy to chunk content
|
|
let result = try Kreuzberg.extractFileSync("markdown/extraction_test.md", nil, "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'chunks' not available on result type
|
|
// skipped: field 'chunks_have_content' not available on result type
|
|
// skipped: field 'chunks_have_heading_context' not available on result type
|
|
// skipped: field 'first_chunk_starts_with_heading' not available on result type
|
|
}
|
|
|
|
func testConfigDocumentStructureWithHeadings() throws {
|
|
// Tests document structure with DOCX heading-driven nesting
|
|
let result = try Kreuzberg.extractFileSync("docx/fake.docx", nil, "{\"include_document_structure\":true}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
|
// skipped: field 'document' not available on result type
|
|
// skipped: field 'document.nodes' not available on result type
|
|
}
|
|
|
|
func testConfigElementTypes() throws {
|
|
// Tests element-based result format with element type assertions on DOCX
|
|
let result = try Kreuzberg.extractFileSync("docx/unit_test_headers.docx", nil, "{\"result_format\":\"element_based\"}")
|
|
XCTAssertTrue(result.mimeType.contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values")
|
|
// skipped: field 'elements' not available on result type
|
|
}
|
|
|
|
func testConfigExtractionTimeout() throws {
|
|
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
|
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"extraction_timeout_secs\":300}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
}
|
|
|
|
func testConfigKeywords() throws {
|
|
// Tests keyword extraction via YAKE algorithm
|
|
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'keywords' not available on result type
|
|
// skipped: field 'keywords' not available on result type
|
|
}
|
|
|
|
func testConfigPages() throws {
|
|
// Tests page extraction and page marker configuration
|
|
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
XCTAssertTrue(result.content.contains("PAGE"), "expected to contain at least one of the specified values")
|
|
}
|
|
|
|
func testConfigQualityEnabled() throws {
|
|
// Tests quality scoring produces a score value in [0.0, 1.0]
|
|
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"enable_quality_processing\":true}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'quality_score' not available on result type
|
|
// skipped: field 'quality_score' not available on result type
|
|
// skipped: field 'quality_score' not available on result type
|
|
}
|
|
|
|
func testConfigSecurityLimits() throws {
|
|
// Tests archive extraction with custom security limits
|
|
let result = try Kreuzberg.extractFileSync("archives/documents.zip", nil, "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}")
|
|
XCTAssertTrue(result.mimeType.contains("application/zip") || result.mimeType.contains("application/x-zip-compressed"), "expected to contain at least one of the specified values")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
}
|
|
|
|
func testConfigTreeSitter() throws {
|
|
// Tests tree-sitter configuration round-trip
|
|
let result = try Kreuzberg.extractFileSync("code/hello.py", nil, "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "text/x-source-code")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 5)
|
|
}
|
|
|
|
func testOutputFormatBytesMarkdown() throws {
|
|
// Tests markdown output format via bytes extraction API
|
|
let result = try Kreuzberg.extractBytesSync("pdf/fake_memo.pdf", "application/pdf", "{\"output_format\":\"markdown\"}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'metadata.output_format' not available on result type
|
|
}
|
|
|
|
func testOutputFormatMarkdown() throws {
|
|
// Tests Markdown output format
|
|
let result = try Kreuzberg.extractFileSync("pdf/fake_memo.pdf", nil, "{\"output_format\":\"markdown\"}")
|
|
XCTAssertEqual(result.mimeType.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), "application/pdf")
|
|
XCTAssertGreaterThanOrEqual(result.content.count, 10)
|
|
// skipped: field 'metadata.output_format' not available on result type
|
|
}
|
|
|
|
}
|