Files
fil/packages/swift/Sources/RustBridge/Plugins.swift
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

495 lines
18 KiB
Swift
Generated
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Hand-authored Swift-side adapter classes for the inbound plugin trait bridge.
//
// The Rust crate `kreuzberg-swift` declares `extern "Swift" type Swift{Trait}Box` for each
// kreuzberg plugin trait Rust calls into Swift via these handles whenever the host needs
// to drive a registered Swift plugin. swift-bridge looks up the Swift classes by name and
// uses `Unmanaged<T>.passRetained` to bridge ARC across the FFI boundary.
//
// This file is *not* alef-generated: alef emits the Rust side of the bridge plus the FFI
// shim signatures, but the user-facing Swift API (the protocols you adopt, plus the box
// classes that adapt those protocols to the FFI) lives here so users can iterate without
// needing to regenerate the bindings.
//
// Marshalling strategy mirrors the Rust side:
//
// - Primitives, `String`, `[UInt8]`, and `[String]` pass through directly.
// - Complex types (`OcrConfig`, `ExtractionConfig`, `ExtractionResult`, ) are exchanged
// as JSON-encoded strings and decoded via `Codable`.
// - Fallible methods return a JSON envelope (`{"ok": <value>}` / `{"err": "<message>"}`)
// because swift-bridge 0.1.59 cannot bridge `Result<RustString, RustString>` correctly.
//
// To register a Swift plugin:
//
// ```swift
// final class MyOcrBackend: OcrBackend { /* */ }
// try Kreuzberg.registerOcrBackend(MyOcrBackend())
// ```
import Foundation
import RustBridge
// MARK: - JSON envelope helpers
/// JSON envelope used by every fallible Swift trait method. Carries `Ok(T)` as
/// `{"ok": <serialised T>}` and `Err(String)` as `{"err": "<message>"}`. Mirrors the Rust
/// `InboundEnvelope<T>` enum in the alef-generated bridge.
private enum InboundEnvelope<T: Encodable>: Encodable {
case ok(T)
case err(String)
enum CodingKeys: String, CodingKey { case ok, err }
func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
switch self {
case .ok(let value): try container.encode(value, forKey: .ok)
case .err(let message): try container.encode(message, forKey: .err)
}
}
}
/// Encode a successful `()` result as `{"ok":null}`.
private func encodeOkVoidEnvelope() -> RustString {
return RustString("{\"ok\":null}")
}
/// Encode a successful `T: Encodable` result as `{"ok": <T>}`. Failures during encoding
/// are caught and converted into an error envelope so the Rust side never sees a panic.
private func encodeOkEnvelope<T: Encodable>(_ value: T) -> RustString {
do {
let payload = InboundEnvelope.ok(value)
let data = try JSONEncoder().encode(payload)
return RustString(
String(data: data, encoding: .utf8) ?? "{\"err\":\"swift: invalid utf8 in envelope\"}")
} catch {
return encodeErrEnvelope("swift: failed to encode ok envelope: \(error)")
}
}
/// Encode a failure as `{"err": "<message>"}`.
private func encodeErrEnvelope(_ message: String) -> RustString {
let escaped = message.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(
of: "\"", with: "\\\"")
return RustString("{\"err\":\"\(escaped)\"}")
}
/// Decode a JSON-encoded payload into a `Decodable` type. Throws on failure.
private func decodeJson<T: Decodable>(_ json: String, as type: T.Type) throws -> T {
let data = json.data(using: .utf8) ?? Data()
return try JSONDecoder().decode(type, from: data)
}
// MARK: - OcrBackend
/// Swift-native protocol mirroring the Rust `OcrBackend` plugin trait.
///
/// Conforming classes must be reference types (`AnyObject`) so the Rust side can hold a
/// stable retained reference. Complex parameter and return values are exchanged as JSON
/// strings via `Codable`-compatible types.
public protocol OcrBackend: AnyObject {
/// Stable plugin name used as the registry key.
func name() -> String
/// Plugin version (semver-style string).
func version() -> String
/// Initialise the plugin. Throw to abort registration.
func initialize() throws
/// Shutdown hook. Throw to log a non-fatal cleanup error.
func shutdown() throws
/// Process a raw image buffer. `config` is a serialised `kreuzberg::OcrConfig`;
/// the return value must be a serialised `kreuzberg::ExtractionResult`.
func processImage(_ image_bytes: [UInt8], config: String) throws -> String
/// Process an image file at the given path. `config` is a serialised `OcrConfig`.
func processImageFile(path: String, config: String) throws -> String
/// Whether the plugin supports the given language.
func supportsLanguage(_ lang: String) -> Bool
/// JSON-encoded `OcrBackendType`.
func backendTypeJson() -> String
/// Languages supported by this backend.
func supportedLanguages() -> [String]
/// Whether the plugin can detect tables.
func supportsTableDetection() -> Bool
/// Whether the plugin can process whole documents (vs single images).
func supportsDocumentProcessing() -> Bool
/// Process an entire document. `config` is a serialised `OcrConfig`; the return
/// value must be a serialised `ExtractionResult`.
func processDocument(path: String, config: String) throws -> String
}
/// FFI adapter class for `OcrBackend`. Rust looks up `SwiftOcrBackendBox` by name from
/// the `extern "Swift"` block and dispatches calls through `Unmanaged<T>.fromOpaque(...)`.
public final class SwiftOcrBackendBox {
private let inner: OcrBackend
public init(_ inner: OcrBackend) {
self.inner = inner
}
public func alef_name() -> RustString { RustString(inner.name()) }
public func alef_version() -> RustString { RustString(inner.version()) }
public func alef_initialize() -> RustString {
do {
try inner.initialize()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_shutdown() -> RustString {
do {
try inner.shutdown()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_process_image(image_bytes: RustVec<UInt8>, config: RustString) -> RustString {
do {
let bytes = Array(image_bytes)
let result = try inner.processImage(bytes, config: config.toString())
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_process_image_file(path: RustString, config: RustString) -> RustString {
do {
let result = try inner.processImageFile(path: path.toString(), config: config.toString())
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_supports_language(lang: RustString) -> Bool {
return inner.supportsLanguage(lang.toString())
}
public func alef_backend_type() -> RustString {
return RustString(inner.backendTypeJson())
}
public func alef_supported_languages() -> RustVec<RustString> {
let languages = inner.supportedLanguages()
let vec = RustVec<RustString>()
for lang in languages { vec.push(value: RustString(lang)) }
return vec
}
public func alef_supports_table_detection() -> Bool { inner.supportsTableDetection() }
public func alef_supports_document_processing() -> Bool { inner.supportsDocumentProcessing() }
public func alef_process_document(path: RustString, config: RustString) -> RustString {
do {
let result = try inner.processDocument(path: path.toString(), config: config.toString())
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
}
// MARK: - PostProcessor
/// Swift-native protocol mirroring the Rust `PostProcessor` plugin trait.
public protocol PostProcessor: AnyObject {
func name() -> String
func version() -> String
func initialize() throws
func shutdown() throws
/// Process a serialised `ExtractionResult` (mutable on the Rust side, but we ferry
/// the result as JSON in/out to avoid round-tripping references through the FFI).
/// The return value is the post-processed `ExtractionResult` JSON.
func processJson(result: String, config: String) throws -> String
/// JSON-encoded `ProcessingStage`.
func processingStageJson() -> String
func shouldProcess(result: String, config: String) -> Bool
func estimatedDurationMs(result: String) -> UInt64
func priority() -> Int32
}
public final class SwiftPostProcessorBox {
private let inner: PostProcessor
public init(_ inner: PostProcessor) { self.inner = inner }
public func alef_name() -> RustString { RustString(inner.name()) }
public func alef_version() -> RustString { RustString(inner.version()) }
public func alef_initialize() -> RustString {
do {
try inner.initialize()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_shutdown() -> RustString {
do {
try inner.shutdown()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_process(result: RustString, config: RustString) -> RustString {
do {
let result = try inner.processJson(result: result.toString(), config: config.toString())
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_processing_stage() -> RustString { RustString(inner.processingStageJson()) }
public func alef_should_process(result: RustString, config: RustString) -> Bool {
inner.shouldProcess(result: result.toString(), config: config.toString())
}
public func alef_estimated_duration_ms(result: RustString) -> UInt64 {
inner.estimatedDurationMs(result: result.toString())
}
public func alef_priority() -> Int32 { inner.priority() }
}
// MARK: - Validator
/// Swift-native protocol mirroring the Rust `Validator` plugin trait.
public protocol Validator: AnyObject {
func name() -> String
func version() -> String
func initialize() throws
func shutdown() throws
/// Validate an `ExtractionResult` (passed as JSON). Throw to surface a validation error.
func validate(result: String, config: String) throws
func shouldValidate(result: String, config: String) -> Bool
func priority() -> Int32
}
public final class SwiftValidatorBox {
private let inner: Validator
public init(_ inner: Validator) { self.inner = inner }
public func alef_name() -> RustString { RustString(inner.name()) }
public func alef_version() -> RustString { RustString(inner.version()) }
public func alef_initialize() -> RustString {
do {
try inner.initialize()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_shutdown() -> RustString {
do {
try inner.shutdown()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_validate(result: RustString, config: RustString) -> RustString {
do {
try inner.validate(result: result.toString(), config: config.toString())
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_should_validate(result: RustString, config: RustString) -> Bool {
inner.shouldValidate(result: result.toString(), config: config.toString())
}
public func alef_priority() -> Int32 { inner.priority() }
}
// MARK: - EmbeddingBackend
/// Swift-native protocol mirroring the Rust `EmbeddingBackend` plugin trait.
public protocol EmbeddingBackend: AnyObject {
func name() -> String
func version() -> String
func initialize() throws
func shutdown() throws
/// Embedding dimensions reported by the backend.
func dimensions() -> UInt
/// Embed a batch of texts. Returns a JSON-encoded `Vec<Vec<f32>>` (outer Vec = batch
/// items, inner Vec = embedding components).
func embed(_ texts: [String]) throws -> String
}
public final class SwiftEmbeddingBackendBox {
private let inner: EmbeddingBackend
public init(_ inner: EmbeddingBackend) { self.inner = inner }
public func alef_name() -> RustString { RustString(inner.name()) }
public func alef_version() -> RustString { RustString(inner.version()) }
public func alef_initialize() -> RustString {
do {
try inner.initialize()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_shutdown() -> RustString {
do {
try inner.shutdown()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_dimensions() -> UInt { inner.dimensions() }
public func alef_embed(texts: RustVec<RustString>) -> RustString {
do {
// RustVec<RustString> iteration yields RustStringRef (borrowed). Use the
// String(...) initializer that swift-bridge provides on RustStringRef to
// copy the text out into an owned Swift String.
var strings: [String] = []
let count = texts.len()
var idx: UInt = 0
while idx < count {
strings.append(texts.get(index: idx)!.as_str().toString())
idx += 1
}
let result = try inner.embed(strings)
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
}
// MARK: - DocumentExtractor
/// Swift-native protocol mirroring the Rust `DocumentExtractor` plugin trait.
///
/// User-facing extraction surface: implement `extractBytes` (returns a JSON-encoded
/// `InternalDocument`) and `supportedMimeTypes`. The remaining methods have default
/// implementations that mirror the Rust trait's defaults.
public protocol DocumentExtractor: AnyObject {
func name() -> String
func version() -> String
func initialize() throws
func shutdown() throws
/// Extract from raw bytes. Return a JSON-encoded `InternalDocument`.
func extractBytes(content: [UInt8], mimeType: String, config: String) throws -> String
/// Extract from a filesystem path. Default reads the file and forwards to `extractBytes`.
func extractFile(path: String, mimeType: String, config: String) throws -> String
/// MIME types this extractor claims to support.
func supportedMimeTypes() -> [String]
/// Priority for the registry's selection ordering (0255, default 50).
func priority() -> Int32
/// Whether this extractor can handle the given path + MIME pair.
func canHandle(path: String, mimeType: String) -> Bool
/// JSON-encoded handle to a synchronous extractor, if any. Default returns the
/// JSON `null` sentinel (the Rust bridge does not currently dispatch sync paths).
func asSyncExtractor() -> String
}
extension DocumentExtractor {
public func initialize() throws {}
public func shutdown() throws {}
public func extractFile(path: String, mimeType: String, config: String) throws -> String {
let data = try Data(contentsOf: URL(fileURLWithPath: path))
return try extractBytes(content: [UInt8](data), mimeType: mimeType, config: config)
}
public func priority() -> Int32 { 50 }
public func canHandle(path: String, mimeType: String) -> Bool { true }
public func asSyncExtractor() -> String { "null" }
}
public final class SwiftDocumentExtractorBox {
private let inner: DocumentExtractor
public init(_ inner: DocumentExtractor) {
self.inner = inner
}
public func alef_name() -> RustString { RustString(inner.name()) }
public func alef_version() -> RustString { RustString(inner.version()) }
public func alef_initialize() -> RustString {
do {
try inner.initialize()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_shutdown() -> RustString {
do {
try inner.shutdown()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_extract_bytes(content: RustVec<UInt8>, mime_type: RustString, config: RustString)
-> RustString
{
do {
let bytes = Array(content)
let result = try inner.extractBytes(
content: bytes, mimeType: mime_type.toString(), config: config.toString())
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_extract_file(path: RustString, mime_type: RustString, config: RustString)
-> RustString
{
do {
let result = try inner.extractFile(
path: path.toString(), mimeType: mime_type.toString(), config: config.toString())
return RustString("{\"ok\":\(result)}")
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_supported_mime_types() -> RustVec<RustString> {
let mimes = inner.supportedMimeTypes()
let vec = RustVec<RustString>()
for mime in mimes { vec.push(value: RustString(mime)) }
return vec
}
public func alef_priority() -> Int32 { inner.priority() }
public func alef_can_handle(path: RustString, mime_type: RustString) -> Bool {
inner.canHandle(path: path.toString(), mimeType: mime_type.toString())
}
public func alef_as_sync_extractor() -> RustString { RustString(inner.asSyncExtractor()) }
}
// MARK: - Renderer
/// Swift-native protocol mirroring the Rust `Renderer` plugin trait.
///
/// Implement `render` to convert a JSON-encoded `InternalDocument` to the
/// renderer's target output format. The Rust bridge encodes the document
/// before crossing the FFI boundary; on success return the rendered string.
public protocol Renderer: AnyObject {
func name() -> String
func version() -> String
func initialize() throws
func shutdown() throws
/// Render the document. `doc` is a JSON-encoded `InternalDocument`; return
/// the rendered output as a string.
func render(doc: String) throws -> String
}
extension Renderer {
public func initialize() throws {}
public func shutdown() throws {}
}
public final class SwiftRendererBox {
private let inner: Renderer
public init(_ inner: Renderer) {
self.inner = inner
}
public func alef_name() -> RustString { RustString(inner.name()) }
public func alef_version() -> RustString { RustString(inner.version()) }
public func alef_initialize() -> RustString {
do {
try inner.initialize()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_shutdown() -> RustString {
do {
try inner.shutdown()
return encodeOkVoidEnvelope()
} catch { return encodeErrEnvelope("\(error)") }
}
public func alef_render(doc: RustString) -> RustString {
do {
let result = try inner.render(doc: doc.toString())
// Wrap the rendered string in an `{"ok": "..."}` envelope. `result` is an
// arbitrary string (markdown/html/etc.), so JSONSerialization handles
// escaping safely.
let payload: [String: Any] = ["ok": result]
let data = try JSONSerialization.data(withJSONObject: payload, options: [])
let json = String(data: data, encoding: .utf8) ?? "{\"ok\":\"\"}"
return RustString(json)
} catch { return encodeErrEnvelope("\(error)") }
}
}