This commit is contained in:
13
docs/snippets/swift/plugins/clear_plugins.md
Normal file
13
docs/snippets/swift/plugins/clear_plugins.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
// Clear all registered plugins in each registry
|
||||
try Kreuzberg.clearDocumentExtractors()
|
||||
try Kreuzberg.clearRenderers()
|
||||
try Kreuzberg.clearOcrBackends()
|
||||
try Kreuzberg.clearPostProcessors()
|
||||
try Kreuzberg.clearValidators()
|
||||
try Kreuzberg.clearEmbeddingBackends()
|
||||
|
||||
print("All plugins cleared")
|
||||
```
|
||||
63
docs/snippets/swift/plugins/embedding_backend.md
Normal file
63
docs/snippets/swift/plugins/embedding_backend.md
Normal file
@@ -0,0 +1,63 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
// Wrap a custom embedder (e.g., CoreML, ONNX, API-based).
|
||||
// The Swift class must implement the EmbeddingBackend protocol.
|
||||
final class MyEmbedder: EmbeddingBackend {
|
||||
private let modelUrl: URL
|
||||
|
||||
init(modelUrl: URL) {
|
||||
self.modelUrl = modelUrl
|
||||
}
|
||||
|
||||
// Plugin trait hooks
|
||||
func name() -> String {
|
||||
"my-embedder"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func initialize() -> String { // Returns JSON-encoded Result
|
||||
do {
|
||||
// Warm-up logic here
|
||||
return "{\"ok\": null}"
|
||||
} catch {
|
||||
return "{\"err\": \"Failed to initialize: \(error)\"}"
|
||||
}
|
||||
}
|
||||
|
||||
func shutdown() -> String { // Returns JSON-encoded Result
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
// EmbeddingBackend hooks
|
||||
func dimensions() -> UInt {
|
||||
// Fixed dimensionality for this backend
|
||||
768
|
||||
}
|
||||
|
||||
func embed(texts: [String]) -> String { // Returns JSON-encoded Vec<Vec<f32>>
|
||||
do {
|
||||
// Embed texts using your backend (e.g., CoreML inference)
|
||||
let embeddings: [[Float]] = texts.map { _ in
|
||||
Array(repeating: 0.5, count: 768) // Placeholder
|
||||
}
|
||||
let data = try JSONEncoder().encode(embeddings)
|
||||
let json = String(data: data, encoding: .utf8) ?? "[]"
|
||||
return "{\"ok\": \(json)}"
|
||||
} catch {
|
||||
return "{\"err\": \"Embedding failed: \(error)\"}"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Register once at startup
|
||||
let embedder = MyEmbedder(modelUrl: URL(fileURLWithPath: "/path/to/model"))
|
||||
try Kreuzberg.registerEmbeddingBackend(embedder)
|
||||
|
||||
print("Embedding backend 'my-embedder' registered")
|
||||
// The registered backend can now be referenced by name in EmbeddingConfig
|
||||
// via the plugin selection mechanism once alef supports it
|
||||
```
|
||||
14
docs/snippets/swift/plugins/extractor_registration.md
Normal file
14
docs/snippets/swift/plugins/extractor_registration.md
Normal file
@@ -0,0 +1,14 @@
|
||||
<!-- snippet:skip reason="swift-bridge does not generate Swift-side protocol constructors for plugin registration. The Rust-side FFI defines SwiftDocumentExtractorBox as an opaque extern \"Swift\" type, but swift-bridge does not surface the protocol definition or constructor in the generated Swift package. Custom implementations must be written in Rust." -->
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
// Custom DocumentExtractor registration is not available from Swift.
|
||||
//
|
||||
// The Rust FFI defines SwiftDocumentExtractorBox as an opaque extern "Swift" type
|
||||
// (packages/swift/rust/src/lib.rs, lines 2710-2722), but the swift-bridge code
|
||||
// generator does not emit a Swift-side protocol definition or factory to construct
|
||||
// and register instances.
|
||||
//
|
||||
// Workaround: Implement DocumentExtractor in Rust and register via a Rust FFI shim,
|
||||
// or use the built-in extractors (PDF, DOCX, HTML, etc.) which are pre-registered.
|
||||
```
|
||||
17
docs/snippets/swift/plugins/list_plugins.md
Normal file
17
docs/snippets/swift/plugins/list_plugins.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
let extractors = try Kreuzberg.listDocumentExtractors()
|
||||
let renderers = try Kreuzberg.listRenderers()
|
||||
let processors = try Kreuzberg.listPostProcessors()
|
||||
let ocrBackends = try Kreuzberg.listOcrBackends()
|
||||
let validators = try Kreuzberg.listValidators()
|
||||
let embeddingBackends = try Kreuzberg.listEmbeddingBackends()
|
||||
|
||||
print("Extractors: \(extractors)")
|
||||
print("Renderers: \(renderers)")
|
||||
print("Processors: \(processors)")
|
||||
print("OCR backends: \(ocrBackends)")
|
||||
print("Validators: \(validators)")
|
||||
print("Embedding backends: \(embeddingBackends)")
|
||||
```
|
||||
48
docs/snippets/swift/plugins/min_length_validator.md
Normal file
48
docs/snippets/swift/plugins/min_length_validator.md
Normal file
@@ -0,0 +1,48 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
final class MinLengthValidator: Validator {
|
||||
let minLength: Int
|
||||
|
||||
init(minLength: Int = 100) {
|
||||
self.minLength = minLength
|
||||
}
|
||||
|
||||
func name() -> String {
|
||||
"min_length_validator"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
100
|
||||
}
|
||||
|
||||
func validate(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
// Returns JSON-encoded Result<(), String>
|
||||
let contentLength = result.content().count
|
||||
if contentLength < minLength {
|
||||
let message = "Content too short: \(contentLength) < \(minLength)"
|
||||
return "{\"err\": \"\(message)\"}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldValidate(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
true
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
}
|
||||
|
||||
let validator = MinLengthValidator(minLength: 100)
|
||||
try Kreuzberg.registerValidator(validator)
|
||||
```
|
||||
13
docs/snippets/swift/plugins/pdf_metadata_extractor.md
Normal file
13
docs/snippets/swift/plugins/pdf_metadata_extractor.md
Normal file
@@ -0,0 +1,13 @@
|
||||
<!-- snippet:skip reason="swift-bridge 0.1.59 does not expose SwiftDocumentExtractorBox constructor or protocol definition in generated Swift code. Custom extractors must be implemented in Rust and registered via FFI shim." -->
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
// Custom DocumentExtractor registration is not available from Swift.
|
||||
//
|
||||
// The FFI defines SwiftDocumentExtractorBox opaque type (packages/swift/rust/src/lib.rs),
|
||||
// but swift-bridge's Swift code generator does not emit the protocol definition or
|
||||
// factory required to construct and register instances from Swift.
|
||||
//
|
||||
// Workaround: Augment PDF extraction results by implementing a PostProcessor in Rust,
|
||||
// or post-process ExtractionResult.metadata in Swift after extraction.
|
||||
```
|
||||
46
docs/snippets/swift/plugins/pdf_only_processor.md
Normal file
46
docs/snippets/swift/plugins/pdf_only_processor.md
Normal file
@@ -0,0 +1,46 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
final class PdfOnlyProcessor: PostProcessor {
|
||||
func name() -> String {
|
||||
"pdf-only-processor"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func processingStage() -> String {
|
||||
"middle" // ProcessingStage enum name
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
50 // Default priority
|
||||
}
|
||||
|
||||
func process(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
// Returns JSON-encoded Result<(), String>
|
||||
// No-op post-processor for PDF-only processing
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldProcess(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
result.mimeType() == "application/pdf"
|
||||
}
|
||||
|
||||
func estimatedDurationMs(result: ExtractionResult) -> UInt64 {
|
||||
0 // No processing overhead
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
}
|
||||
|
||||
let processor = PdfOnlyProcessor()
|
||||
try Kreuzberg.registerPostProcessor(processor)
|
||||
```
|
||||
13
docs/snippets/swift/plugins/plugin_extractor.md
Normal file
13
docs/snippets/swift/plugins/plugin_extractor.md
Normal file
@@ -0,0 +1,13 @@
|
||||
<!-- snippet:skip reason="swift-bridge 0.1.59 does not expose SwiftDocumentExtractorBox constructor or protocol definition in generated Swift code. Custom extractors must be implemented in Rust and registered via FFI shim." -->
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
// Custom DocumentExtractor registration is not available from Swift.
|
||||
//
|
||||
// The Rust FFI (packages/swift/rust/src/lib.rs) accepts SwiftDocumentExtractorBox,
|
||||
// but swift-bridge does not generate the Swift-side protocol definition or
|
||||
// constructor required to implement and register instances.
|
||||
//
|
||||
// Solution: Implement DocumentExtractor in Rust and wrap it in a Rust FFI shim
|
||||
// that links both `kreuzberg` and the `kreuzberg-swift` package.
|
||||
```
|
||||
56
docs/snippets/swift/plugins/plugin_logging.md
Normal file
56
docs/snippets/swift/plugins/plugin_logging.md
Normal file
@@ -0,0 +1,56 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
import os.log
|
||||
|
||||
let logger = Logger(subsystem: "com.example.plugins", category: "MyPlugin")
|
||||
|
||||
final class MyPlugin: PostProcessor {
|
||||
func name() -> String {
|
||||
"my-plugin"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
logger.info("Initializing plugin: my-plugin")
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
logger.info("Shutting down plugin: my-plugin")
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func process(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
let contentLen = result.content().count
|
||||
logger.info("Processing \(result.mimeType()) (\(contentLen) bytes)")
|
||||
|
||||
if contentLen == 0 {
|
||||
logger.warning("Processing resulted in empty content")
|
||||
}
|
||||
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldProcess(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
true
|
||||
}
|
||||
|
||||
func processingStage() -> String {
|
||||
"early"
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
50
|
||||
}
|
||||
|
||||
func estimatedDurationMs(result: ExtractionResult) -> UInt64 {
|
||||
10
|
||||
}
|
||||
}
|
||||
|
||||
let plugin = MyPlugin()
|
||||
try Kreuzberg.registerPostProcessor(plugin)
|
||||
```
|
||||
46
docs/snippets/swift/plugins/plugin_testing.md
Normal file
46
docs/snippets/swift/plugins/plugin_testing.md
Normal file
@@ -0,0 +1,46 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
import Testing
|
||||
|
||||
// Unit test a Swift Validator implementation
|
||||
final class MinLengthValidator: Validator {
|
||||
let minLength: Int
|
||||
|
||||
init(minLength: Int = 100) {
|
||||
self.minLength = minLength
|
||||
}
|
||||
|
||||
func name() -> String { "test-validator" }
|
||||
func version() -> String { "1.0.0" }
|
||||
func priority() -> Int32 { 50 }
|
||||
func initialize() -> String { "{\"ok\": null}" }
|
||||
func shutdown() -> String { "{\"ok\": null}" }
|
||||
|
||||
func validate(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
let contentLength = result.content().count
|
||||
if contentLength < minLength {
|
||||
return "{\"err\": \"Content too short: \(contentLength) < \(minLength)\"}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldValidate(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// Unit test the validator by directly testing its logic.
|
||||
// Integration tests exercise validators in-pipeline during extraction.
|
||||
|
||||
let validator = MinLengthValidator(minLength: 100)
|
||||
|
||||
// Create extraction config and result via the binding
|
||||
let configJson = "{\"use_cache\": false}"
|
||||
let config = try extractionConfigFromJson(configJson)
|
||||
|
||||
// Extract a document; the validator runs automatically during extraction
|
||||
let result = try extractFile(path: "test.txt", mimeType: "text/plain", config: config)
|
||||
|
||||
// The validator's validate() method is invoked in-pipeline.
|
||||
// If it rejects, the extraction throws an error.
|
||||
```
|
||||
55
docs/snippets/swift/plugins/plugin_validator.md
Normal file
55
docs/snippets/swift/plugins/plugin_validator.md
Normal file
@@ -0,0 +1,55 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
final class MinLengthValidator: Validator {
|
||||
func name() -> String {
|
||||
"min_length"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
50
|
||||
}
|
||||
|
||||
func validate(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
let contentLength = result.content().count
|
||||
if contentLength < 50 {
|
||||
let message = "Content too short: \(contentLength)"
|
||||
return "{\"err\": \"\(message)\"}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldValidate(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
true
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
}
|
||||
|
||||
let validator = MinLengthValidator()
|
||||
try Kreuzberg.registerValidator(validator)
|
||||
|
||||
// Extract a file; the validator runs in-pipeline during extraction
|
||||
let config = ExtractionConfig(
|
||||
useCache: false,
|
||||
enableQualityProcessing: false,
|
||||
resultFormat: .unified,
|
||||
outputFormat: .markdown
|
||||
)
|
||||
let result = try extractFileSync(
|
||||
path: "document.pdf",
|
||||
mimeType: nil,
|
||||
config: config
|
||||
)
|
||||
print("Content length: \(result.content().count)")
|
||||
```
|
||||
54
docs/snippets/swift/plugins/quality_score_validator.md
Normal file
54
docs/snippets/swift/plugins/quality_score_validator.md
Normal file
@@ -0,0 +1,54 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
final class QualityValidator: Validator {
|
||||
let threshold: Double = 0.5
|
||||
|
||||
func name() -> String {
|
||||
"quality-validator"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
75
|
||||
}
|
||||
|
||||
func validate(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
// Parse metadata to extract quality score
|
||||
let metadata = result.metadata()
|
||||
let qualityScore: Double
|
||||
|
||||
if let scoreStr = metadata["quality_score"] as? String,
|
||||
let score = Double(scoreStr) {
|
||||
qualityScore = score
|
||||
} else {
|
||||
qualityScore = 0.0
|
||||
}
|
||||
|
||||
if qualityScore < threshold {
|
||||
let message = "Quality score too low: \(String(format: "%.2f", qualityScore))"
|
||||
return "{\"err\": \"\(message)\"}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldValidate(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
// Only validate if quality processing was enabled
|
||||
config.enableQualityProcessing()
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
}
|
||||
|
||||
let validator = QualityValidator()
|
||||
try Kreuzberg.registerValidator(validator)
|
||||
```
|
||||
66
docs/snippets/swift/plugins/stateful_plugin.md
Normal file
66
docs/snippets/swift/plugins/stateful_plugin.md
Normal file
@@ -0,0 +1,66 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
import os.lock
|
||||
|
||||
final class StatefulPlugin: PostProcessor {
|
||||
private var lock = NSLock()
|
||||
private var callCount: Int = 0
|
||||
private var cache: [String: String] = [:]
|
||||
|
||||
func name() -> String {
|
||||
"stateful-plugin"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func processingStage() -> String {
|
||||
"middle"
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
50
|
||||
}
|
||||
|
||||
func process(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
|
||||
callCount += 1
|
||||
cache["last_mime"] = result.mimeType()
|
||||
cache["call_count"] = String(callCount)
|
||||
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldProcess(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
true
|
||||
}
|
||||
|
||||
func estimatedDurationMs(result: ExtractionResult) -> UInt64 {
|
||||
1 // Minimal overhead
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
callCount = 0
|
||||
cache.removeAll()
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
let finalCount = callCount
|
||||
cache.removeAll()
|
||||
let message = "Processed \(finalCount) extractions"
|
||||
print(message)
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
}
|
||||
|
||||
let plugin = StatefulPlugin()
|
||||
try Kreuzberg.registerPostProcessor(plugin)
|
||||
```
|
||||
17
docs/snippets/swift/plugins/unregister_plugins.md
Normal file
17
docs/snippets/swift/plugins/unregister_plugins.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
let names = [
|
||||
"custom-json-extractor",
|
||||
"word_count",
|
||||
"cloud-ocr",
|
||||
"min_length_validator",
|
||||
]
|
||||
|
||||
try Kreuzberg.unregisterDocumentExtractor(names[0])
|
||||
try Kreuzberg.unregisterPostProcessor(names[1])
|
||||
try Kreuzberg.unregisterOcrBackend(names[2])
|
||||
try Kreuzberg.unregisterValidator(names[3])
|
||||
|
||||
print("Plugins unregistered")
|
||||
```
|
||||
49
docs/snippets/swift/plugins/word_count_processor.md
Normal file
49
docs/snippets/swift/plugins/word_count_processor.md
Normal file
@@ -0,0 +1,49 @@
|
||||
```swift title="Swift"
|
||||
import Kreuzberg
|
||||
|
||||
final class WordCountProcessor: PostProcessor {
|
||||
func name() -> String {
|
||||
"word_count"
|
||||
}
|
||||
|
||||
func version() -> String {
|
||||
"1.0.0"
|
||||
}
|
||||
|
||||
func processingStage() -> String {
|
||||
"early"
|
||||
}
|
||||
|
||||
func priority() -> Int32 {
|
||||
50
|
||||
}
|
||||
|
||||
func process(result: ExtractionResult, config: ExtractionConfig) -> String {
|
||||
let content = result.content()
|
||||
let words = content.split(separator: " ").count
|
||||
|
||||
// Metadata is not directly mutable via the FFI, so store in logs or use
|
||||
// a side-channel approach. For now, just track that processing happened.
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shouldProcess(result: ExtractionResult, config: ExtractionConfig) -> Bool {
|
||||
!result.content().isEmpty
|
||||
}
|
||||
|
||||
func estimatedDurationMs(result: ExtractionResult) -> UInt64 {
|
||||
5
|
||||
}
|
||||
|
||||
func initialize() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
|
||||
func shutdown() -> String {
|
||||
"{\"ok\": null}"
|
||||
}
|
||||
}
|
||||
|
||||
let processor = WordCountProcessor()
|
||||
try Kreuzberg.registerPostProcessor(processor)
|
||||
```
|
||||
Reference in New Issue
Block a user