This commit is contained in:
8
packages/swift/.editorconfig
generated
Normal file
8
packages/swift/.editorconfig
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
|
||||
[*.swift]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
6
packages/swift/.gitignore
generated
vendored
Normal file
6
packages/swift/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
.build/
|
||||
Packages/
|
||||
xcuserdata/
|
||||
DerivedData/
|
||||
.swiftpm/
|
||||
*.xcodeproj
|
||||
3
packages/swift/.swiftformat
generated
Normal file
3
packages/swift/.swiftformat
generated
Normal file
@@ -0,0 +1,3 @@
|
||||
lineLength = 120
|
||||
indent = 2
|
||||
usesTabs = false
|
||||
78
packages/swift/BUILDING.md
generated
Normal file
78
packages/swift/BUILDING.md
generated
Normal file
@@ -0,0 +1,78 @@
|
||||
# Building Kreuzberg
|
||||
|
||||
The Swift package wraps a Rust library via [swift-bridge](https://github.com/chinedufn/swift-bridge).
|
||||
SwiftPM cannot invoke Cargo directly, so you must run the cargo build step first.
|
||||
|
||||
## Workflow
|
||||
|
||||
### 1. Build the Rust binding crate
|
||||
|
||||
From the **repository root**:
|
||||
|
||||
```sh
|
||||
cargo build -p kreuzberg-swift
|
||||
```
|
||||
|
||||
This compiles `target/debug/libkreuzberg_swift.a` and runs
|
||||
`swift-bridge-build` in `build.rs`, which writes generated Swift and C sources
|
||||
into `target/debug/build/kreuzberg-swift-*/out/`.
|
||||
|
||||
### 2. Copy generated sources into the SwiftPM targets
|
||||
|
||||
The package uses two internal targets:
|
||||
|
||||
- `Sources/RustBridgeC/` — pure C target with the combined C header
|
||||
- `Sources/RustBridge/` — Swift bridge files that `import RustBridgeC`
|
||||
|
||||
```sh
|
||||
OUT=$(ls -dt target/debug/build/kreuzberg-swift-*/out 2>/dev/null | head -1)
|
||||
|
||||
# Combine C headers into the RustBridgeC target
|
||||
cat "$OUT/SwiftBridgeCore.h" "$OUT/kreuzberg-swift/kreuzberg-swift.h" \
|
||||
> packages/swift/Sources/RustBridgeC/RustBridgeC.h
|
||||
|
||||
# Copy Swift bridge files, prepending "import RustBridgeC" so they see the C types.
|
||||
# Use `{ echo ...; cat ...; }` rather than `printf "...$(cat)..."` because printf
|
||||
# interprets `%` and `\` sequences in its format string, which would corrupt the
|
||||
# generated Swift sources.
|
||||
{ echo "import RustBridgeC"; cat "$OUT/SwiftBridgeCore.swift"; } \
|
||||
> packages/swift/Sources/RustBridge/SwiftBridgeCore.swift
|
||||
{ echo "import RustBridgeC"; cat "$OUT/kreuzberg-swift/kreuzberg-swift.swift"; } \
|
||||
> packages/swift/Sources/RustBridge/kreuzberg-swift.swift
|
||||
```
|
||||
|
||||
If the glob `kreuzberg-swift-*/out` matches multiple directories, `ls -dt ... | head -1`
|
||||
picks the most recently modified one.
|
||||
|
||||
### 3. Build and test the Swift package
|
||||
|
||||
```sh
|
||||
swift build --package-path packages/swift
|
||||
swift test --package-path packages/swift
|
||||
```
|
||||
|
||||
## Release builds
|
||||
|
||||
Replace `target/debug` with `target/release` and pass
|
||||
`--configuration release` to `swift build`:
|
||||
|
||||
```sh
|
||||
cargo build --release -p kreuzberg-swift
|
||||
OUT=$(ls -dt target/release/build/kreuzberg-swift-*/out 2>/dev/null | head -1)
|
||||
|
||||
cat "$OUT/SwiftBridgeCore.h" "$OUT/kreuzberg-swift/kreuzberg-swift.h" \
|
||||
> packages/swift/Sources/RustBridgeC/RustBridgeC.h
|
||||
{ echo "import RustBridgeC"; cat "$OUT/SwiftBridgeCore.swift"; } \
|
||||
> packages/swift/Sources/RustBridge/SwiftBridgeCore.swift
|
||||
{ echo "import RustBridgeC"; cat "$OUT/kreuzberg-swift/kreuzberg-swift.swift"; } \
|
||||
> packages/swift/Sources/RustBridge/kreuzberg-swift.swift
|
||||
|
||||
swift build --package-path packages/swift --configuration release
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- Files in `Sources/RustBridgeC/` and the generated Swift files in
|
||||
`Sources/RustBridge/` are **generated artifacts** — overwritten by the copy step.
|
||||
- `Sources/RustBridge/RustBridge.swift` is a placeholder and is overwritten.
|
||||
- `target/` is in `.gitignore`; regenerate after every `cargo clean`.
|
||||
9
packages/swift/Examples/Demo/main.swift
generated
Normal file
9
packages/swift/Examples/Demo/main.swift
generated
Normal file
@@ -0,0 +1,9 @@
|
||||
import Kreuzberg
|
||||
|
||||
@main
|
||||
struct Demo {
|
||||
static func main() {
|
||||
print("Demo: Kreuzberg loaded successfully")
|
||||
// Add your API calls here after code generation
|
||||
}
|
||||
}
|
||||
93
packages/swift/LICENSE
generated
Normal file
93
packages/swift/LICENSE
generated
Normal file
@@ -0,0 +1,93 @@
|
||||
Elastic License 2.0 (ELv2)
|
||||
|
||||
Copyright 2025-2026 Kreuzberg, Inc.
|
||||
|
||||
Acceptance
|
||||
|
||||
By using the software, you agree to all of the terms and conditions below.
|
||||
|
||||
Copyright License
|
||||
|
||||
The licensor grants you a non-exclusive, royalty-free, worldwide,
|
||||
non-sublicensable, non-transferable license to use, copy, distribute, make
|
||||
available, and prepare derivative works of the software, in each case subject to
|
||||
the limitations and conditions below.
|
||||
|
||||
Limitations
|
||||
|
||||
You may not provide the software to third parties as a hosted or managed
|
||||
service, where the service provides users with access to any substantial set of
|
||||
the features or functionality of the software.
|
||||
|
||||
You may not move, change, disable, or circumvent the license key functionality
|
||||
in the software, and you may not remove or obscure any functionality in the
|
||||
software that is protected by the license key.
|
||||
|
||||
You may not alter, remove, or obscure any licensing, copyright, or other notices
|
||||
of the licensor in the software. Any use of the licensor's trademarks is subject
|
||||
to applicable law.
|
||||
|
||||
Patents
|
||||
|
||||
The licensor grants you a license, under any patent claims the licensor can
|
||||
license, or becomes able to license, to make, have made, use, sell, offer for
|
||||
sale, import and have imported the software, in each case subject to the
|
||||
limitations and conditions in this license. This license does not cover any
|
||||
patent claims that you cause to be infringed by modifications or additions to the
|
||||
software. If you or your company make any written claim that the software
|
||||
infringes or contributes to infringement of any patent, your patent license for
|
||||
the software granted under these terms ends immediately. If your company makes
|
||||
such a claim, your patent license ends immediately for work on behalf of your
|
||||
company.
|
||||
|
||||
Notices
|
||||
|
||||
You must ensure that anyone who gets a copy of any part of the software from you
|
||||
also gets a copy of these terms.
|
||||
|
||||
If you modify the software, you must include in any modified copies of the
|
||||
software prominent notices stating that you have modified the software.
|
||||
|
||||
No Other Rights
|
||||
|
||||
These terms do not imply any licenses other than those expressly granted in
|
||||
these terms.
|
||||
|
||||
Termination
|
||||
|
||||
If you use the software in violation of these terms, such use is not licensed,
|
||||
and your licenses will automatically terminate. If the licensor provides you with
|
||||
a notice of your violation, and you cease all violation of this license no later
|
||||
than 30 days after you receive that notice, your licenses will be reinstated
|
||||
retroactively. However, if you violate these terms after such reinstatement, any
|
||||
additional violation of these terms will cause your licenses to terminate
|
||||
automatically and permanently.
|
||||
|
||||
No Liability
|
||||
|
||||
As far as the law allows, the software comes as is, without any warranty or
|
||||
condition, and the licensor will not be liable to you for any damages arising out
|
||||
of these terms or the use or nature of the software, under any kind of legal
|
||||
claim.
|
||||
|
||||
Definitions
|
||||
|
||||
The licensor is the entity offering these terms, and the software is the
|
||||
software the licensor makes available under these terms, including any portion
|
||||
of it.
|
||||
|
||||
you refers to the individual or entity agreeing to these terms.
|
||||
|
||||
your company is any legal entity, sole proprietorship, or other kind of
|
||||
organization that you work for, plus all organizations that have control over,
|
||||
are under the control of, or are under common control with that organization.
|
||||
control means ownership of substantially all the assets of an entity, or the
|
||||
power to direct its management and policies by vote, contract, or otherwise.
|
||||
Control can be direct or indirect.
|
||||
|
||||
your licenses are all the licenses granted to you for the software under these
|
||||
terms.
|
||||
|
||||
use means anything you do with the software requiring one of your licenses.
|
||||
|
||||
trademark means trademarks, service marks, and similar rights.
|
||||
54
packages/swift/Package.swift
generated
Normal file
54
packages/swift/Package.swift
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
// swift-tools-version: 6.0
|
||||
import PackageDescription
|
||||
|
||||
// NOTE: Run `cargo build -p kreuzberg-swift` and then rerun `alef generate`
|
||||
// before `swift build`. Alef materializes the swift-bridge Swift/C outputs into
|
||||
// Sources/RustBridge and Sources/RustBridgeC when the Cargo build output exists.
|
||||
// See README.md for the full workflow.
|
||||
let package = Package(
|
||||
name: "Kreuzberg",
|
||||
platforms: [
|
||||
.macOS(.v13),
|
||||
.iOS(.v16),
|
||||
],
|
||||
products: [
|
||||
.library(name: "Kreuzberg", targets: ["Kreuzberg"])
|
||||
],
|
||||
targets: [
|
||||
// RustBridgeC: pure C/headers target. Swift files in RustBridge import this
|
||||
// to access C types (RustStr, etc.) produced by swift-bridge.
|
||||
// publicHeadersPath: "." exposes RustBridgeC.h to dependents.
|
||||
.target(
|
||||
name: "RustBridgeC",
|
||||
path: "Sources/RustBridgeC",
|
||||
publicHeadersPath: "."
|
||||
),
|
||||
// RustBridge: Swift wrapper around the Rust static library.
|
||||
// Depends on RustBridgeC so the generated Swift files can use the C types.
|
||||
// linkerSettings wire the Rust staticlib (libkreuzberg_swift.a) produced by
|
||||
// `cargo build -p kreuzberg-swift` so `swift build` / `swift test` can resolve
|
||||
// the `__swift_bridge__$*` C symbols. Both target/release and target/debug are
|
||||
// searched so either cargo profile works.
|
||||
.target(
|
||||
name: "RustBridge",
|
||||
dependencies: ["RustBridgeC"],
|
||||
path: "Sources/RustBridge",
|
||||
linkerSettings: [
|
||||
.unsafeFlags([
|
||||
"-L../../target/release",
|
||||
"-L../../target/debug",
|
||||
]),
|
||||
.linkedLibrary("kreuzberg_swift"),
|
||||
.linkedFramework("Security", .when(platforms: [.macOS, .iOS])),
|
||||
.linkedFramework("CoreFoundation", .when(platforms: [.macOS, .iOS])),
|
||||
.linkedFramework("SystemConfiguration", .when(platforms: [.macOS])),
|
||||
]
|
||||
),
|
||||
.target(
|
||||
name: "Kreuzberg", dependencies: ["RustBridge"],
|
||||
path: "Sources/Kreuzberg"),
|
||||
.testTarget(
|
||||
name: "KreuzbergTests", dependencies: ["Kreuzberg"],
|
||||
path: "Tests/KreuzbergTests"),
|
||||
]
|
||||
)
|
||||
449
packages/swift/README.md
generated
Normal file
449
packages/swift/README.md
generated
Normal file
@@ -0,0 +1,449 @@
|
||||
# Swift
|
||||
|
||||
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
|
||||
<a href="https://github.com/kreuzberg-dev/alef">
|
||||
<img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings">
|
||||
</a>
|
||||
<!-- Language Bindings -->
|
||||
<a href="https://crates.io/crates/kreuzberg">
|
||||
<img src="https://img.shields.io/crates/v/kreuzberg?label=Rust&color=007ec6" alt="Rust">
|
||||
</a>
|
||||
<a href="https://pypi.org/project/kreuzberg/">
|
||||
<img src="https://img.shields.io/pypi/v/kreuzberg?label=Python&color=007ec6" alt="Python">
|
||||
</a>
|
||||
<a href="https://www.npmjs.com/package/@kreuzberg/node">
|
||||
<img src="https://img.shields.io/npm/v/@kreuzberg/node?label=Node.js&color=007ec6" alt="Node.js">
|
||||
</a>
|
||||
<a href="https://www.npmjs.com/package/@kreuzberg/wasm">
|
||||
<img src="https://img.shields.io/npm/v/@kreuzberg/wasm?label=WASM&color=007ec6" alt="WASM">
|
||||
</a>
|
||||
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg">
|
||||
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
||||
</a>
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/go/v5">
|
||||
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v5*" alt="Go">
|
||||
</a>
|
||||
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
||||
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
||||
</a>
|
||||
<a href="https://packagist.org/packages/kreuzberg/kreuzberg">
|
||||
<img src="https://img.shields.io/packagist/v/kreuzberg/kreuzberg?label=PHP&color=007ec6" alt="PHP">
|
||||
</a>
|
||||
<a href="https://rubygems.org/gems/kreuzberg">
|
||||
<img src="https://img.shields.io/gem/v/kreuzberg?label=Ruby&color=007ec6" alt="Ruby">
|
||||
</a>
|
||||
<a href="https://hex.pm/packages/kreuzberg">
|
||||
<img src="https://img.shields.io/hexpm/v/kreuzberg?label=Elixir&color=007ec6" alt="Elixir">
|
||||
</a>
|
||||
<a href="https://kreuzberg-dev.r-universe.dev/kreuzberg">
|
||||
<img src="https://img.shields.io/badge/R-kreuzberg-007ec6" alt="R">
|
||||
</a>
|
||||
<a href="https://pub.dev/packages/kreuzberg">
|
||||
<img src="https://img.shields.io/pub/v/kreuzberg?label=Dart&color=007ec6" alt="Dart">
|
||||
</a>
|
||||
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/kreuzberg-android">
|
||||
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg-android?label=Kotlin&color=007ec6" alt="Kotlin">
|
||||
</a>
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/swift">
|
||||
<img src="https://img.shields.io/badge/Swift-SPM-007ec6" alt="Swift">
|
||||
</a>
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/tree/main/packages/zig">
|
||||
<img src="https://img.shields.io/badge/Zig-package-007ec6" alt="Zig">
|
||||
</a>
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
||||
<img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C FFI">
|
||||
</a>
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/kreuzberg">
|
||||
<img src="https://img.shields.io/badge/Docker-ghcr.io-007ec6?logo=docker&logoColor=white" alt="Docker">
|
||||
</a>
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/pkgs/container/charts%2Fkreuzberg">
|
||||
<img src="https://img.shields.io/badge/Helm-ghcr.io-007ec6?logo=helm&logoColor=white" alt="Helm">
|
||||
</a>
|
||||
|
||||
<!-- Project Info -->
|
||||
<a href="https://github.com/kreuzberg-dev/kreuzberg/blob/main/LICENSE">
|
||||
<img src="https://img.shields.io/badge/License-Elastic--2.0-007ec6" alt="License">
|
||||
</a>
|
||||
<a href="https://docs.kreuzberg.dev">
|
||||
<img src="https://img.shields.io/badge/Docs-kreuzberg-007ec6" alt="Documentation">
|
||||
</a>
|
||||
<a href="https://huggingface.co/Kreuzberg">
|
||||
<img src="https://img.shields.io/badge/Hugging%20Face-Kreuzberg-007ec6" alt="Hugging Face">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div align="center" style="margin: 24px 0 0;">
|
||||
<a href="https://kreuzberg.dev">
|
||||
<img alt="Kreuzberg" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div align="center" style="display: flex; flex-wrap: wrap; gap: 12px; justify-content: center; margin: 28px 0 24px;">
|
||||
<a href="https://discord.gg/xt9WY3GnKR">
|
||||
<img height="22" src="https://img.shields.io/badge/Discord-Chat-007ec6?logo=discord&logoColor=white" alt="Join Discord">
|
||||
</a>
|
||||
<a href="https://docs.kreuzberg.dev/demo.html">
|
||||
<img height="22" src="https://img.shields.io/badge/Live%20Demo-Open-007ec6?logo=webassembly&logoColor=white" alt="Live Demo">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
Extract text, tables, images, and metadata from 90+ file formats and 300+ programming languages including PDF, Office documents, and images. Swift bindings via swift-bridge for macOS, iOS, and Linux, with native Swift types and async/await support.
|
||||
|
||||
## What This Package Provides
|
||||
|
||||
- **Document intelligence core** — extract text, tables, images, metadata, entities, keywords, and code intelligence from one API.
|
||||
- **Format coverage** — PDF, Office, images, HTML/XML, email, archives, notebooks, citations, scientific formats, and plain text.
|
||||
- **OCR choices** — Tesseract, PaddleOCR, EasyOCR where supported, VLM OCR through liter-llm, and plugin hooks for custom backends.
|
||||
- **Same engine as every binding** — Rust, Python, Node.js, Go, Java, PHP, Ruby, .NET, Elixir, R, WASM, Kotlin Android, Swift, Dart, Zig, and C FFI share the same Rust implementation.
|
||||
- **SwiftPM package** — Swift Concurrency API for Apple targets.
|
||||
|
||||
## Installation
|
||||
|
||||
### Package Installation
|
||||
|
||||
Add to your `Package.swift` dependencies:
|
||||
|
||||
```swift
|
||||
.package(url: "https://github.com/kreuzberg-dev/kreuzberg.git", from: "5.0.0-rc.3"),
|
||||
```
|
||||
|
||||
Then add the product to the relevant target:
|
||||
|
||||
```swift
|
||||
.target(
|
||||
name: "YourTarget",
|
||||
dependencies: [
|
||||
.product(name: "Kreuzberg", package: "kreuzberg"),
|
||||
]
|
||||
),
|
||||
```
|
||||
|
||||
### System Requirements
|
||||
- **Swift 6.0+** (`swift-tools-version: 6.0`) on macOS 13+ or iOS 16+
|
||||
- Native runtime delivered through the C FFI surface from `kreuzberg-ffi`; published artifacts ship as a binary target
|
||||
- Optional: [ONNX Runtime](https://github.com/microsoft/onnxruntime/releases) version 1.22.x for embeddings support
|
||||
- Optional: [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for OCR functionality
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Basic Extraction
|
||||
|
||||
Extract text, metadata, and structure from any supported document format:
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
let config = try extractionConfigFromJson("{}")
|
||||
let result = try extractFileSync("document.pdf", nil, config)
|
||||
|
||||
print(result.content().toString())
|
||||
print("MIME type: \(result.mime_type().toString())")
|
||||
print("Tables: \(result.tables().count)")
|
||||
```
|
||||
|
||||
### Common Use Cases
|
||||
|
||||
#### Extract with Custom Configuration
|
||||
|
||||
Most use cases benefit from configuration to control extraction behavior:
|
||||
|
||||
**With OCR (for scanned documents):**
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
let configJson = """
|
||||
{
|
||||
"ocr": {
|
||||
"backend": "tesseract",
|
||||
"language": "eng"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
let config = try extractionConfigFromJson(configJson)
|
||||
let result = try extractFileSync("scanned.pdf", nil, config)
|
||||
|
||||
print(result.content().toString())
|
||||
```
|
||||
|
||||
#### Table Extraction
|
||||
|
||||
See [Configuration Guide](https://docs.kreuzberg.dev/guides/configuration/) for table extraction options.
|
||||
|
||||
#### Processing Multiple Files
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
// `BatchFileItem` is an opaque swift-bridge class with no public Swift
|
||||
// constructor — build items from JSON via `batchFileItemFromJson`.
|
||||
let items = RustVec<BatchFileItem>()
|
||||
for path in ["doc1.pdf", "doc2.docx", "report.pdf"] {
|
||||
let json = "{\"path\": \"\(path)\"}"
|
||||
items.push(value: try batchFileItemFromJson(json))
|
||||
}
|
||||
|
||||
let config = try extractionConfigFromJson("{}")
|
||||
let results = try batchExtractFilesSync(items, config)
|
||||
|
||||
for (index, result) in results.enumerated() {
|
||||
print("File \(index): \(result.content().toString().count) chars")
|
||||
}
|
||||
```
|
||||
|
||||
#### Async Processing
|
||||
|
||||
For non-blocking document processing:
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
@main
|
||||
struct App {
|
||||
static func main() async throws {
|
||||
let config = try extractionConfigFromJson("{}")
|
||||
// The Swift binding exposes async-compatible entrypoints; even though
|
||||
// the bridge calls are synchronous internally, callers may `await` them
|
||||
// to integrate with Swift Concurrency.
|
||||
let result = try await extractFile("document.pdf", nil, config)
|
||||
|
||||
print(result.content().toString())
|
||||
print("MIME type: \(result.mime_type().toString())")
|
||||
print("Tables: \(result.tables().count)")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Next Steps
|
||||
|
||||
- **[Installation Guide](https://docs.kreuzberg.dev/getting-started/installation/)** - Platform-specific setup
|
||||
- **[API Documentation](https://docs.kreuzberg.dev/reference/api-python/)** - Complete API reference
|
||||
- **[Examples & Guides](https://docs.kreuzberg.dev/)** - Full code examples and usage guides
|
||||
- **[Configuration Guide](https://docs.kreuzberg.dev/guides/configuration/)** - Advanced configuration options
|
||||
|
||||
## Features
|
||||
|
||||
### Supported File Formats (90+)
|
||||
|
||||
90+ file formats across 8 major categories with intelligent format detection and comprehensive metadata extraction.
|
||||
|
||||
#### Office Documents
|
||||
|
||||
| Category | Formats | Capabilities |
|
||||
|----------|---------|--------------|
|
||||
| **Word Processing** | `.docx`, `.docm`, `.dotx`, `.dotm`, `.dot`, `.odt` | Full text, tables, images, metadata, styles |
|
||||
| **Spreadsheets** | `.xlsx`, `.xlsm`, `.xlsb`, `.xls`, `.xla`, `.xlam`, `.xltm`, `.xltx`, `.xlt`, `.ods` | Sheet data, formulas, cell metadata, charts |
|
||||
| **Presentations** | `.pptx`, `.pptm`, `.ppsx`, `.potx`, `.potm`, `.pot`, `.ppt` | Slides, speaker notes, images, metadata |
|
||||
| **PDF** | `.pdf` | Text, tables, images, metadata, OCR support |
|
||||
| **eBooks** | `.epub`, `.fb2` | Chapters, metadata, embedded resources |
|
||||
| **Database** | `.dbf` | Table data extraction, field type support |
|
||||
| **Hangul** | `.hwp`, `.hwpx` | Korean document format, text extraction |
|
||||
|
||||
#### Images (OCR-Enabled)
|
||||
|
||||
| Category | Formats | Features |
|
||||
|----------|---------|----------|
|
||||
| **Raster** | `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`, `.tiff`, `.tif` | OCR, table detection, EXIF metadata, dimensions, color space |
|
||||
| **Advanced** | `.jp2`, `.jpx`, `.jpm`, `.mj2`, `.jbig2`, `.jb2`, `.pnm`, `.pbm`, `.pgm`, `.ppm` | OCR via hayro-jpeg2000 (pure Rust decoder), JBIG2 support, table detection, format-specific metadata |
|
||||
| **Vector** | `.svg` | DOM parsing, embedded text, graphics metadata |
|
||||
|
||||
#### Web & Data
|
||||
|
||||
| Category | Formats | Features |
|
||||
|----------|---------|----------|
|
||||
| **Markup** | `.html`, `.htm`, `.xhtml`, `.xml`, `.svg` | DOM parsing, metadata (Open Graph, Twitter Card), link extraction |
|
||||
| **Structured Data** | `.json`, `.yaml`, `.yml`, `.toml`, `.csv`, `.tsv` | Schema detection, nested structures, validation |
|
||||
| **Text & Markdown** | `.txt`, `.md`, `.markdown`, `.djot`, `.rst`, `.org`, `.rtf` | CommonMark, GFM, Djot, reStructuredText, Org Mode |
|
||||
|
||||
#### Email & Archives
|
||||
|
||||
| Category | Formats | Features |
|
||||
|----------|---------|----------|
|
||||
| **Email** | `.eml`, `.msg` | Headers, body (HTML/plain), attachments, threading |
|
||||
| **Archives** | `.zip`, `.tar`, `.tgz`, `.gz`, `.7z` | File listing, nested archives, metadata |
|
||||
|
||||
#### Academic & Scientific
|
||||
|
||||
| Category | Formats | Features |
|
||||
|----------|---------|----------|
|
||||
| **Citations** | `.bib`, `.biblatex`, `.ris`, `.nbib`, `.enw`, `.csl` | Structured parsing: RIS (structured), PubMed/MEDLINE, EndNote XML (structured), BibTeX, CSL JSON |
|
||||
| **Scientific** | `.tex`, `.latex`, `.typst`, `.jats`, `.ipynb`, `.docbook` | LaTeX, Jupyter notebooks, PubMed JATS |
|
||||
| **Documentation** | `.opml`, `.pod`, `.mdoc`, `.troff` | Technical documentation formats |
|
||||
|
||||
#### Code Intelligence (300+ Languages)
|
||||
|
||||
| Feature | Description |
|
||||
|---------|-------------|
|
||||
| **Structure Extraction** | Functions, classes, methods, structs, interfaces, enums |
|
||||
| **Import/Export Analysis** | Module dependencies, re-exports, wildcard imports |
|
||||
| **Symbol Extraction** | Variables, constants, type aliases, properties |
|
||||
| **Docstring Parsing** | Google, NumPy, Sphinx, JSDoc, RustDoc, and 10+ formats |
|
||||
| **Diagnostics** | Parse errors with line/column positions |
|
||||
| **Syntax-Aware Chunking** | Split code by semantic boundaries, not arbitrary byte offsets |
|
||||
|
||||
Powered by [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — [documentation](https://docs.tree-sitter-language-pack.kreuzberg.dev).
|
||||
|
||||
**[Complete Format Reference](https://docs.kreuzberg.dev/reference/formats/)**
|
||||
|
||||
### Key Capabilities
|
||||
|
||||
- **Text Extraction** - Extract all text content with position and formatting information
|
||||
- **Metadata Extraction** - Retrieve document properties, creation date, author, etc.
|
||||
- **Table Extraction** - Parse tables with structure and cell content preservation
|
||||
- **Image Extraction** - Extract embedded images and render page previews
|
||||
- **OCR Support** - Integrate multiple OCR backends for scanned documents
|
||||
- **Async/Await** - Non-blocking document processing with concurrent operations
|
||||
- **Plugin System** - Extensible post-processing for custom text transformation
|
||||
- **Embeddings** - Generate vector embeddings using ONNX Runtime models
|
||||
- **Batch Processing** - Efficiently process multiple documents in parallel
|
||||
- **Memory Efficient** - Stream large files without loading entirely into memory
|
||||
- **Language Detection** - Detect and support multiple languages in documents
|
||||
- **Code Intelligence** - Extract structure, imports, exports, symbols, and docstrings from [300+ programming languages](https://docs.tree-sitter-language-pack.kreuzberg.dev) via tree-sitter
|
||||
- **Configuration** - Fine-grained control over extraction behavior
|
||||
|
||||
### Performance Characteristics
|
||||
|
||||
| Format | Speed | Memory | Notes |
|
||||
|--------|-------|--------|-------|
|
||||
| **PDF (text)** | 10-100 MB/s | ~50MB per doc | Fastest extraction |
|
||||
| **Office docs** | 20-200 MB/s | ~100MB per doc | DOCX, XLSX, PPTX |
|
||||
| **Images (OCR)** | 1-5 MB/s | Variable | Depends on OCR backend |
|
||||
| **Archives** | 5-50 MB/s | ~200MB per doc | ZIP, TAR, etc. |
|
||||
| **Web formats** | 50-200 MB/s | Streaming | HTML, XML, JSON |
|
||||
|
||||
## OCR Support
|
||||
|
||||
Kreuzberg supports multiple OCR backends for extracting text from scanned documents and images:
|
||||
|
||||
- **Tesseract**
|
||||
|
||||
- **Paddleocr**
|
||||
|
||||
### OCR Configuration Example
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
let configJson = """
|
||||
{
|
||||
"ocr": {
|
||||
"backend": "tesseract",
|
||||
"language": "eng"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
let config = try extractionConfigFromJson(configJson)
|
||||
let result = try extractFileSync("scanned.pdf", nil, config)
|
||||
|
||||
print(result.content().toString())
|
||||
```
|
||||
|
||||
## Async Support
|
||||
|
||||
This binding provides full async/await support for non-blocking document processing:
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
@main
|
||||
struct App {
|
||||
static func main() async throws {
|
||||
let config = try extractionConfigFromJson("{}")
|
||||
// The Swift binding exposes async-compatible entrypoints; even though
|
||||
// the bridge calls are synchronous internally, callers may `await` them
|
||||
// to integrate with Swift Concurrency.
|
||||
let result = try await extractFile("document.pdf", nil, config)
|
||||
|
||||
print(result.content().toString())
|
||||
print("MIME type: \(result.mime_type().toString())")
|
||||
print("Tables: \(result.tables().count)")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Plugin System
|
||||
|
||||
Kreuzberg supports extensible post-processing plugins for custom text transformation and filtering.
|
||||
|
||||
For detailed plugin documentation, visit [Plugin System Guide](https://docs.kreuzberg.dev/guides/plugins/).
|
||||
|
||||
## Embeddings Support
|
||||
|
||||
Generate vector embeddings for extracted text using the built-in ONNX Runtime support. Requires ONNX Runtime installation.
|
||||
|
||||
**[Embeddings Guide](https://docs.kreuzberg.dev/features/#embeddings)**
|
||||
|
||||
## Batch Processing
|
||||
|
||||
Process multiple documents efficiently:
|
||||
|
||||
```swift title="Swift"
|
||||
import Foundation
|
||||
import Kreuzberg
|
||||
import RustBridge
|
||||
|
||||
// `BatchFileItem` is an opaque swift-bridge class with no public Swift
|
||||
// constructor — build items from JSON via `batchFileItemFromJson`.
|
||||
let items = RustVec<BatchFileItem>()
|
||||
for path in ["doc1.pdf", "doc2.docx", "report.pdf"] {
|
||||
let json = "{\"path\": \"\(path)\"}"
|
||||
items.push(value: try batchFileItemFromJson(json))
|
||||
}
|
||||
|
||||
let config = try extractionConfigFromJson("{}")
|
||||
let results = try batchExtractFilesSync(items, config)
|
||||
|
||||
for (index, result) in results.enumerated() {
|
||||
print("File \(index): \(result.content().toString().count) chars")
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
For advanced configuration options including language detection, table extraction, OCR settings, and more:
|
||||
|
||||
**[Configuration Guide](https://docs.kreuzberg.dev/guides/configuration/)**
|
||||
|
||||
## Documentation
|
||||
|
||||
- **[Official Documentation](https://docs.kreuzberg.dev/)**
|
||||
- **[API Reference](https://docs.kreuzberg.dev/reference/api-python/)**
|
||||
- **[Examples & Guides](https://docs.kreuzberg.dev/)**
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! See [Contributing Guide](https://github.com/kreuzberg-dev/kreuzberg/blob/main/CONTRIBUTING.md).
|
||||
|
||||
## Part of Kreuzberg.dev
|
||||
|
||||
- [Kreuzberg Cloud](https://github.com/kreuzberg-dev/kreuzberg-cloud) — managed extraction API with SDKs, dashboards, and observability.
|
||||
- [kreuzcrawl](https://github.com/kreuzberg-dev/kreuzcrawl) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
|
||||
- [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown) — fast, lossless HTML→Markdown engine.
|
||||
- [liter-llm](https://github.com/kreuzberg-dev/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
|
||||
- [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — tree-sitter grammars and code-intelligence primitives.
|
||||
- [alef](https://github.com/kreuzberg-dev/alef) — the polyglot binding generator that produces this README and all per-language bindings.
|
||||
- [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
|
||||
|
||||
## License
|
||||
|
||||
Elastic-2.0 License — see [LICENSE](../../LICENSE) for details.
|
||||
|
||||
## Support
|
||||
|
||||
- **Discord Community**: [Join our Discord](https://discord.gg/xt9WY3GnKR)
|
||||
- **GitHub Issues**: [Report bugs](https://github.com/kreuzberg-dev/kreuzberg/issues)
|
||||
- **Discussions**: [Ask questions](https://github.com/kreuzberg-dev/kreuzberg/discussions)
|
||||
58
packages/swift/Sources/Kreuzberg/BridgeRegistrationOverloads.swift
generated
Normal file
58
packages/swift/Sources/Kreuzberg/BridgeRegistrationOverloads.swift
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains convenience overloads matching the alef e2e generator's call shapes.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
// MARK: - Unregister name: label overloads
|
||||
|
||||
public func unregisterOcrBackend(name: String) throws {
|
||||
try RustBridge.unregisterOcrBackend(name)
|
||||
}
|
||||
|
||||
public func unregisterPostProcessor(name: String) throws {
|
||||
try RustBridge.unregisterPostProcessor(name)
|
||||
}
|
||||
|
||||
public func unregisterValidator(name: String) throws {
|
||||
try RustBridge.unregisterValidator(name)
|
||||
}
|
||||
|
||||
public func unregisterEmbeddingBackend(name: String) throws {
|
||||
try RustBridge.unregisterEmbeddingBackend(name)
|
||||
}
|
||||
|
||||
public func unregisterDocumentExtractor(name: String) throws {
|
||||
try RustBridge.unregisterDocumentExtractor(name)
|
||||
}
|
||||
|
||||
public func unregisterRenderer(name: String) throws {
|
||||
try RustBridge.unregisterRenderer(name)
|
||||
}
|
||||
|
||||
// MARK: - Bridge → Box register overloads
|
||||
|
||||
public func registerOcrBackend(_ bridge: any SwiftOcrBackendBridge) throws {
|
||||
try registerOcrBackend(SwiftOcrBackendBox(bridge))
|
||||
}
|
||||
|
||||
public func registerPostProcessor(_ bridge: any SwiftPostProcessorBridge) throws {
|
||||
try registerPostProcessor(SwiftPostProcessorBox(bridge))
|
||||
}
|
||||
|
||||
public func registerValidator(_ bridge: any SwiftValidatorBridge) throws {
|
||||
try registerValidator(SwiftValidatorBox(bridge))
|
||||
}
|
||||
|
||||
public func registerEmbeddingBackend(_ bridge: any SwiftEmbeddingBackendBridge) throws {
|
||||
try registerEmbeddingBackend(SwiftEmbeddingBackendBox(bridge))
|
||||
}
|
||||
|
||||
public func registerDocumentExtractor(_ bridge: any SwiftDocumentExtractorBridge) throws {
|
||||
try registerDocumentExtractor(SwiftDocumentExtractorBox(bridge))
|
||||
}
|
||||
|
||||
public func registerRenderer(_ bridge: any SwiftRendererBridge) throws {
|
||||
try registerRenderer(SwiftRendererBox(bridge))
|
||||
}
|
||||
20
packages/swift/Sources/Kreuzberg/ExtractionResultExtensions.swift
generated
Normal file
20
packages/swift/Sources/Kreuzberg/ExtractionResultExtensions.swift
generated
Normal file
@@ -0,0 +1,20 @@
|
||||
import RustBridge
|
||||
|
||||
// MARK: - Property-access ergonomics for e2e tests
|
||||
//
|
||||
// This file provides computed-property aliases for methods on swift-bridge-generated types,
|
||||
// allowing callers to write `result.mimeType` rather than `result.mimeType()`.
|
||||
// These extensions are especially useful in e2e test assertions where the alef
|
||||
// fixture generator emits property-access syntax.
|
||||
//
|
||||
// Although these are primarily for test convenience, they are part of the public API
|
||||
// and can be used in production code for more ergonomic access to extraction results.
|
||||
|
||||
extension RustBridge.ServerConfigRef {
|
||||
/// Computed-property alias for `listen_addr()` method.
|
||||
public var listen_addr: String {
|
||||
self.listen_addr().toString()
|
||||
}
|
||||
}
|
||||
|
||||
// ServerConfigRefMut and ServerConfig inherit the extensions automatically
|
||||
8477
packages/swift/Sources/Kreuzberg/Kreuzberg.swift
generated
Normal file
8477
packages/swift/Sources/Kreuzberg/Kreuzberg.swift
generated
Normal file
File diff suppressed because it is too large
Load Diff
93
packages/swift/Sources/Kreuzberg/LICENSE
generated
Normal file
93
packages/swift/Sources/Kreuzberg/LICENSE
generated
Normal file
@@ -0,0 +1,93 @@
|
||||
Elastic License 2.0 (ELv2)
|
||||
|
||||
Copyright 2025-2026 Kreuzberg, Inc.
|
||||
|
||||
Acceptance
|
||||
|
||||
By using the software, you agree to all of the terms and conditions below.
|
||||
|
||||
Copyright License
|
||||
|
||||
The licensor grants you a non-exclusive, royalty-free, worldwide,
|
||||
non-sublicensable, non-transferable license to use, copy, distribute, make
|
||||
available, and prepare derivative works of the software, in each case subject to
|
||||
the limitations and conditions below.
|
||||
|
||||
Limitations
|
||||
|
||||
You may not provide the software to third parties as a hosted or managed
|
||||
service, where the service provides users with access to any substantial set of
|
||||
the features or functionality of the software.
|
||||
|
||||
You may not move, change, disable, or circumvent the license key functionality
|
||||
in the software, and you may not remove or obscure any functionality in the
|
||||
software that is protected by the license key.
|
||||
|
||||
You may not alter, remove, or obscure any licensing, copyright, or other notices
|
||||
of the licensor in the software. Any use of the licensor's trademarks is subject
|
||||
to applicable law.
|
||||
|
||||
Patents
|
||||
|
||||
The licensor grants you a license, under any patent claims the licensor can
|
||||
license, or becomes able to license, to make, have made, use, sell, offer for
|
||||
sale, import and have imported the software, in each case subject to the
|
||||
limitations and conditions in this license. This license does not cover any
|
||||
patent claims that you cause to be infringed by modifications or additions to the
|
||||
software. If you or your company make any written claim that the software
|
||||
infringes or contributes to infringement of any patent, your patent license for
|
||||
the software granted under these terms ends immediately. If your company makes
|
||||
such a claim, your patent license ends immediately for work on behalf of your
|
||||
company.
|
||||
|
||||
Notices
|
||||
|
||||
You must ensure that anyone who gets a copy of any part of the software from you
|
||||
also gets a copy of these terms.
|
||||
|
||||
If you modify the software, you must include in any modified copies of the
|
||||
software prominent notices stating that you have modified the software.
|
||||
|
||||
No Other Rights
|
||||
|
||||
These terms do not imply any licenses other than those expressly granted in
|
||||
these terms.
|
||||
|
||||
Termination
|
||||
|
||||
If you use the software in violation of these terms, such use is not licensed,
|
||||
and your licenses will automatically terminate. If the licensor provides you with
|
||||
a notice of your violation, and you cease all violation of this license no later
|
||||
than 30 days after you receive that notice, your licenses will be reinstated
|
||||
retroactively. However, if you violate these terms after such reinstatement, any
|
||||
additional violation of these terms will cause your licenses to terminate
|
||||
automatically and permanently.
|
||||
|
||||
No Liability
|
||||
|
||||
As far as the law allows, the software comes as is, without any warranty or
|
||||
condition, and the licensor will not be liable to you for any damages arising out
|
||||
of these terms or the use or nature of the software, under any kind of legal
|
||||
claim.
|
||||
|
||||
Definitions
|
||||
|
||||
The licensor is the entity offering these terms, and the software is the
|
||||
software the licensor makes available under these terms, including any portion
|
||||
of it.
|
||||
|
||||
you refers to the individual or entity agreeing to these terms.
|
||||
|
||||
your company is any legal entity, sole proprietorship, or other kind of
|
||||
organization that you work for, plus all organizations that have control over,
|
||||
are under the control of, or are under common control with that organization.
|
||||
control means ownership of substantially all the assets of an entity, or the
|
||||
power to direct its management and policies by vote, contract, or otherwise.
|
||||
Control can be direct or indirect.
|
||||
|
||||
your licenses are all the licenses granted to you for the software under these
|
||||
terms.
|
||||
|
||||
use means anything you do with the software requiring one of your licenses.
|
||||
|
||||
trademark means trademarks, service marks, and similar rights.
|
||||
72
packages/swift/Sources/Kreuzberg/SwiftDocumentExtractorBridge.swift
generated
Normal file
72
packages/swift/Sources/Kreuzberg/SwiftDocumentExtractorBridge.swift
generated
Normal file
@@ -0,0 +1,72 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains generated FFI glue for trait bridge registration.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
/// Protocol for outbound `DocumentExtractor` implementations.
|
||||
/// Conform your Swift class or struct to this protocol to implement
|
||||
/// a Rust trait from the host side.
|
||||
public protocol SwiftDocumentExtractorBridge: AnyObject {
|
||||
func extractBytes(content: Data, mime_type: String, config: ExtractionConfig) throws -> String
|
||||
func supportedMimeTypes() -> [String]
|
||||
}
|
||||
|
||||
/// Internal adapter wrapping a `SwiftDocumentExtractorBridge` conformer.
|
||||
/// Marshals Swift types and trait calls to/from the C boundary.
|
||||
/// Excluded/internal types are serialised to/from JSON strings.
|
||||
final class SwiftDocumentExtractorAdapter {
|
||||
private let bridge: any SwiftDocumentExtractorBridge
|
||||
|
||||
init(bridge: any SwiftDocumentExtractorBridge) {
|
||||
self.bridge = bridge
|
||||
}
|
||||
|
||||
func extractBytesCall(content: Data, mime_type: String, config: ExtractionConfig) throws -> String {
|
||||
do {
|
||||
let result = try self.bridge.extractBytes(content: content, mime_type: mime_type, config: config)
|
||||
let encodedData = try marshal_encode_excluded(result)
|
||||
if let jsonString = String(data: encodedData, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
} catch {
|
||||
return marshal_error_result(error)
|
||||
}
|
||||
}
|
||||
|
||||
func supportedMimeTypesCall() -> [String] {
|
||||
let result = self.bridge.supportedMimeTypes()
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - Marshalling helpers
|
||||
|
||||
private struct Empty: Codable {}
|
||||
|
||||
private func marshal_ok_result<T: Encodable>(_ value: T) -> String {
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(value),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
private func marshal_encode_excluded<T: Encodable>(_ value: T) throws -> Data {
|
||||
let encoder = JSONEncoder()
|
||||
return try encoder.encode(value)
|
||||
}
|
||||
|
||||
private func marshal_error_result(_ error: any Error) -> String {
|
||||
let errorString = String(describing: error)
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(errorString),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"err\": \(jsonString)}"
|
||||
}
|
||||
return "{\"err\": \"unknown error\"}"
|
||||
}
|
||||
68
packages/swift/Sources/Kreuzberg/SwiftEmbeddingBackendBridge.swift
generated
Normal file
68
packages/swift/Sources/Kreuzberg/SwiftEmbeddingBackendBridge.swift
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains generated FFI glue for trait bridge registration.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
/// Protocol for outbound `EmbeddingBackend` implementations.
|
||||
/// Conform your Swift class or struct to this protocol to implement
|
||||
/// a Rust trait from the host side.
|
||||
public protocol SwiftEmbeddingBackendBridge: AnyObject {
|
||||
func dimensions() -> Int
|
||||
func embed(texts: [String]) throws -> [[Float]]
|
||||
}
|
||||
|
||||
/// Internal adapter wrapping a `SwiftEmbeddingBackendBridge` conformer.
|
||||
/// Marshals Swift types and trait calls to/from the C boundary.
|
||||
/// Excluded/internal types are serialised to/from JSON strings.
|
||||
final class SwiftEmbeddingBackendAdapter {
|
||||
private let bridge: any SwiftEmbeddingBackendBridge
|
||||
|
||||
init(bridge: any SwiftEmbeddingBackendBridge) {
|
||||
self.bridge = bridge
|
||||
}
|
||||
|
||||
func dimensionsCall() -> Int {
|
||||
let result = self.bridge.dimensions()
|
||||
return result
|
||||
}
|
||||
|
||||
func embedCall(texts: [String]) throws -> String {
|
||||
do {
|
||||
let result = try self.bridge.embed(texts: texts)
|
||||
return marshal_ok_result(try JSONEncoder().encode(result))
|
||||
} catch {
|
||||
return marshal_error_result(error)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - Marshalling helpers
|
||||
|
||||
private struct Empty: Codable {}
|
||||
|
||||
private func marshal_ok_result<T: Encodable>(_ value: T) -> String {
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(value),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
private func marshal_encode_excluded<T: Encodable>(_ value: T) throws -> Data {
|
||||
let encoder = JSONEncoder()
|
||||
return try encoder.encode(value)
|
||||
}
|
||||
|
||||
private func marshal_error_result(_ error: any Error) -> String {
|
||||
let errorString = String(describing: error)
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(errorString),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"err\": \(jsonString)}"
|
||||
}
|
||||
return "{\"err\": \"unknown error\"}"
|
||||
}
|
||||
78
packages/swift/Sources/Kreuzberg/SwiftOcrBackendBridge.swift
generated
Normal file
78
packages/swift/Sources/Kreuzberg/SwiftOcrBackendBridge.swift
generated
Normal file
@@ -0,0 +1,78 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains generated FFI glue for trait bridge registration.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
/// Protocol for outbound `OcrBackend` implementations.
|
||||
/// Conform your Swift class or struct to this protocol to implement
|
||||
/// a Rust trait from the host side.
|
||||
public protocol SwiftOcrBackendBridge: AnyObject {
|
||||
func processImage(image_bytes: Data, config: OcrConfig) throws -> String
|
||||
func supportsLanguage(lang: String) -> Bool
|
||||
func backendType() -> OcrBackendType
|
||||
}
|
||||
|
||||
/// Internal adapter wrapping a `SwiftOcrBackendBridge` conformer.
|
||||
/// Marshals Swift types and trait calls to/from the C boundary.
|
||||
/// Excluded/internal types are serialised to/from JSON strings.
|
||||
final class SwiftOcrBackendAdapter {
|
||||
private let bridge: any SwiftOcrBackendBridge
|
||||
|
||||
init(bridge: any SwiftOcrBackendBridge) {
|
||||
self.bridge = bridge
|
||||
}
|
||||
|
||||
func processImageCall(image_bytes: Data, config: OcrConfig) throws -> String {
|
||||
do {
|
||||
let result = try self.bridge.processImage(image_bytes: image_bytes, config: config)
|
||||
let encodedData = try marshal_encode_excluded(result)
|
||||
if let jsonString = String(data: encodedData, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
} catch {
|
||||
return marshal_error_result(error)
|
||||
}
|
||||
}
|
||||
|
||||
func supportsLanguageCall(lang: String) -> Bool {
|
||||
let result = self.bridge.supportsLanguage(lang: lang)
|
||||
return result
|
||||
}
|
||||
|
||||
func backendTypeCall() -> OcrBackendType {
|
||||
let result = self.bridge.backendType()
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - Marshalling helpers
|
||||
|
||||
private struct Empty: Codable {}
|
||||
|
||||
private func marshal_ok_result<T: Encodable>(_ value: T) -> String {
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(value),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
private func marshal_encode_excluded<T: Encodable>(_ value: T) throws -> Data {
|
||||
let encoder = JSONEncoder()
|
||||
return try encoder.encode(value)
|
||||
}
|
||||
|
||||
private func marshal_error_result(_ error: any Error) -> String {
|
||||
let errorString = String(describing: error)
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(errorString),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"err\": \(jsonString)}"
|
||||
}
|
||||
return "{\"err\": \"unknown error\"}"
|
||||
}
|
||||
68
packages/swift/Sources/Kreuzberg/SwiftPostProcessorBridge.swift
generated
Normal file
68
packages/swift/Sources/Kreuzberg/SwiftPostProcessorBridge.swift
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains generated FFI glue for trait bridge registration.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
/// Protocol for outbound `PostProcessor` implementations.
|
||||
/// Conform your Swift class or struct to this protocol to implement
|
||||
/// a Rust trait from the host side.
|
||||
public protocol SwiftPostProcessorBridge: AnyObject {
|
||||
func process(result: String, config: ExtractionConfig) throws -> Void
|
||||
func processingStage() -> ProcessingStage
|
||||
}
|
||||
|
||||
/// Internal adapter wrapping a `SwiftPostProcessorBridge` conformer.
|
||||
/// Marshals Swift types and trait calls to/from the C boundary.
|
||||
/// Excluded/internal types are serialised to/from JSON strings.
|
||||
final class SwiftPostProcessorAdapter {
|
||||
private let bridge: any SwiftPostProcessorBridge
|
||||
|
||||
init(bridge: any SwiftPostProcessorBridge) {
|
||||
self.bridge = bridge
|
||||
}
|
||||
|
||||
func processCall(result: String, config: ExtractionConfig) throws -> String {
|
||||
do {
|
||||
let result = try self.bridge.process(result: result, config: config)
|
||||
return marshal_ok_result(Empty())
|
||||
} catch {
|
||||
return marshal_error_result(error)
|
||||
}
|
||||
}
|
||||
|
||||
func processingStageCall() -> ProcessingStage {
|
||||
let result = self.bridge.processingStage()
|
||||
return result
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - Marshalling helpers
|
||||
|
||||
private struct Empty: Codable {}
|
||||
|
||||
private func marshal_ok_result<T: Encodable>(_ value: T) -> String {
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(value),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
private func marshal_encode_excluded<T: Encodable>(_ value: T) throws -> Data {
|
||||
let encoder = JSONEncoder()
|
||||
return try encoder.encode(value)
|
||||
}
|
||||
|
||||
private func marshal_error_result(_ error: any Error) -> String {
|
||||
let errorString = String(describing: error)
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(errorString),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"err\": \(jsonString)}"
|
||||
}
|
||||
return "{\"err\": \"unknown error\"}"
|
||||
}
|
||||
62
packages/swift/Sources/Kreuzberg/SwiftRendererBridge.swift
generated
Normal file
62
packages/swift/Sources/Kreuzberg/SwiftRendererBridge.swift
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains generated FFI glue for trait bridge registration.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
/// Protocol for outbound `Renderer` implementations.
|
||||
/// Conform your Swift class or struct to this protocol to implement
|
||||
/// a Rust trait from the host side.
|
||||
public protocol SwiftRendererBridge: AnyObject {
|
||||
func render(doc: String) throws -> String
|
||||
}
|
||||
|
||||
/// Internal adapter wrapping a `SwiftRendererBridge` conformer.
|
||||
/// Marshals Swift types and trait calls to/from the C boundary.
|
||||
/// Excluded/internal types are serialised to/from JSON strings.
|
||||
final class SwiftRendererAdapter {
|
||||
private let bridge: any SwiftRendererBridge
|
||||
|
||||
init(bridge: any SwiftRendererBridge) {
|
||||
self.bridge = bridge
|
||||
}
|
||||
|
||||
func renderCall(doc: String) throws -> String {
|
||||
do {
|
||||
let result = try self.bridge.render(doc: doc)
|
||||
return marshal_ok_result(result)
|
||||
} catch {
|
||||
return marshal_error_result(error)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - Marshalling helpers
|
||||
|
||||
private struct Empty: Codable {}
|
||||
|
||||
private func marshal_ok_result<T: Encodable>(_ value: T) -> String {
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(value),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
private func marshal_encode_excluded<T: Encodable>(_ value: T) throws -> Data {
|
||||
let encoder = JSONEncoder()
|
||||
return try encoder.encode(value)
|
||||
}
|
||||
|
||||
private func marshal_error_result(_ error: any Error) -> String {
|
||||
let errorString = String(describing: error)
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(errorString),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"err\": \(jsonString)}"
|
||||
}
|
||||
return "{\"err\": \"unknown error\"}"
|
||||
}
|
||||
62
packages/swift/Sources/Kreuzberg/SwiftValidatorBridge.swift
generated
Normal file
62
packages/swift/Sources/Kreuzberg/SwiftValidatorBridge.swift
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
// swift-format-ignore-file
|
||||
// This file contains generated FFI glue for trait bridge registration.
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
/// Protocol for outbound `Validator` implementations.
|
||||
/// Conform your Swift class or struct to this protocol to implement
|
||||
/// a Rust trait from the host side.
|
||||
public protocol SwiftValidatorBridge: AnyObject {
|
||||
func validate(result: String, config: ExtractionConfig) throws -> Void
|
||||
}
|
||||
|
||||
/// Internal adapter wrapping a `SwiftValidatorBridge` conformer.
|
||||
/// Marshals Swift types and trait calls to/from the C boundary.
|
||||
/// Excluded/internal types are serialised to/from JSON strings.
|
||||
final class SwiftValidatorAdapter {
|
||||
private let bridge: any SwiftValidatorBridge
|
||||
|
||||
init(bridge: any SwiftValidatorBridge) {
|
||||
self.bridge = bridge
|
||||
}
|
||||
|
||||
func validateCall(result: String, config: ExtractionConfig) throws -> String {
|
||||
do {
|
||||
let result = try self.bridge.validate(result: result, config: config)
|
||||
return marshal_ok_result(Empty())
|
||||
} catch {
|
||||
return marshal_error_result(error)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// MARK: - Marshalling helpers
|
||||
|
||||
private struct Empty: Codable {}
|
||||
|
||||
private func marshal_ok_result<T: Encodable>(_ value: T) -> String {
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(value),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"ok\": \(jsonString)}"
|
||||
}
|
||||
return "{\"ok\": null}"
|
||||
}
|
||||
|
||||
private func marshal_encode_excluded<T: Encodable>(_ value: T) throws -> Data {
|
||||
let encoder = JSONEncoder()
|
||||
return try encoder.encode(value)
|
||||
}
|
||||
|
||||
private func marshal_error_result(_ error: any Error) -> String {
|
||||
let errorString = String(describing: error)
|
||||
let encoder = JSONEncoder()
|
||||
if let data = try? encoder.encode(errorString),
|
||||
let jsonString = String(data: data, encoding: .utf8) {
|
||||
return "{\"err\": \(jsonString)}"
|
||||
}
|
||||
return "{\"err\": \"unknown error\"}"
|
||||
}
|
||||
494
packages/swift/Sources/RustBridge/Plugins.swift
generated
Normal file
494
packages/swift/Sources/RustBridge/Plugins.swift
generated
Normal file
@@ -0,0 +1,494 @@
|
||||
// Hand-authored Swift-side adapter classes for the inbound plugin trait bridge.
|
||||
//
|
||||
// The Rust crate `kreuzberg-swift` declares `extern "Swift" type Swift{Trait}Box` for each
|
||||
// kreuzberg plugin trait — Rust calls into Swift via these handles whenever the host needs
|
||||
// to drive a registered Swift plugin. swift-bridge looks up the Swift classes by name and
|
||||
// uses `Unmanaged<T>.passRetained` to bridge ARC across the FFI boundary.
|
||||
//
|
||||
// This file is *not* alef-generated: alef emits the Rust side of the bridge plus the FFI
|
||||
// shim signatures, but the user-facing Swift API (the protocols you adopt, plus the box
|
||||
// classes that adapt those protocols to the FFI) lives here so users can iterate without
|
||||
// needing to regenerate the bindings.
|
||||
//
|
||||
// Marshalling strategy mirrors the Rust side:
|
||||
//
|
||||
// - Primitives, `String`, `[UInt8]`, and `[String]` pass through directly.
|
||||
// - Complex types (`OcrConfig`, `ExtractionConfig`, `ExtractionResult`, …) are exchanged
|
||||
// as JSON-encoded strings and decoded via `Codable`.
|
||||
// - Fallible methods return a JSON envelope (`{"ok": <value>}` / `{"err": "<message>"}`)
|
||||
// because swift-bridge 0.1.59 cannot bridge `Result<RustString, RustString>` correctly.
|
||||
//
|
||||
// To register a Swift plugin:
|
||||
//
|
||||
// ```swift
|
||||
// final class MyOcrBackend: OcrBackend { /* … */ }
|
||||
// try Kreuzberg.registerOcrBackend(MyOcrBackend())
|
||||
// ```
|
||||
|
||||
import Foundation
|
||||
import RustBridge
|
||||
|
||||
// MARK: - JSON envelope helpers
|
||||
|
||||
/// JSON envelope used by every fallible Swift trait method. Carries `Ok(T)` as
|
||||
/// `{"ok": <serialised T>}` and `Err(String)` as `{"err": "<message>"}`. Mirrors the Rust
|
||||
/// `InboundEnvelope<T>` enum in the alef-generated bridge.
|
||||
private enum InboundEnvelope<T: Encodable>: Encodable {
|
||||
case ok(T)
|
||||
case err(String)
|
||||
|
||||
enum CodingKeys: String, CodingKey { case ok, err }
|
||||
|
||||
func encode(to encoder: Encoder) throws {
|
||||
var container = encoder.container(keyedBy: CodingKeys.self)
|
||||
switch self {
|
||||
case .ok(let value): try container.encode(value, forKey: .ok)
|
||||
case .err(let message): try container.encode(message, forKey: .err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a successful `()` result as `{"ok":null}`.
|
||||
private func encodeOkVoidEnvelope() -> RustString {
|
||||
return RustString("{\"ok\":null}")
|
||||
}
|
||||
|
||||
/// Encode a successful `T: Encodable` result as `{"ok": <T>}`. Failures during encoding
|
||||
/// are caught and converted into an error envelope so the Rust side never sees a panic.
|
||||
private func encodeOkEnvelope<T: Encodable>(_ value: T) -> RustString {
|
||||
do {
|
||||
let payload = InboundEnvelope.ok(value)
|
||||
let data = try JSONEncoder().encode(payload)
|
||||
return RustString(
|
||||
String(data: data, encoding: .utf8) ?? "{\"err\":\"swift: invalid utf8 in envelope\"}")
|
||||
} catch {
|
||||
return encodeErrEnvelope("swift: failed to encode ok envelope: \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a failure as `{"err": "<message>"}`.
|
||||
private func encodeErrEnvelope(_ message: String) -> RustString {
|
||||
let escaped = message.replacingOccurrences(of: "\\", with: "\\\\").replacingOccurrences(
|
||||
of: "\"", with: "\\\"")
|
||||
return RustString("{\"err\":\"\(escaped)\"}")
|
||||
}
|
||||
|
||||
/// Decode a JSON-encoded payload into a `Decodable` type. Throws on failure.
|
||||
private func decodeJson<T: Decodable>(_ json: String, as type: T.Type) throws -> T {
|
||||
let data = json.data(using: .utf8) ?? Data()
|
||||
return try JSONDecoder().decode(type, from: data)
|
||||
}
|
||||
|
||||
// MARK: - OcrBackend
|
||||
|
||||
/// Swift-native protocol mirroring the Rust `OcrBackend` plugin trait.
|
||||
///
|
||||
/// Conforming classes must be reference types (`AnyObject`) so the Rust side can hold a
|
||||
/// stable retained reference. Complex parameter and return values are exchanged as JSON
|
||||
/// strings via `Codable`-compatible types.
|
||||
public protocol OcrBackend: AnyObject {
|
||||
/// Stable plugin name used as the registry key.
|
||||
func name() -> String
|
||||
/// Plugin version (semver-style string).
|
||||
func version() -> String
|
||||
/// Initialise the plugin. Throw to abort registration.
|
||||
func initialize() throws
|
||||
/// Shutdown hook. Throw to log a non-fatal cleanup error.
|
||||
func shutdown() throws
|
||||
/// Process a raw image buffer. `config` is a serialised `kreuzberg::OcrConfig`;
|
||||
/// the return value must be a serialised `kreuzberg::ExtractionResult`.
|
||||
func processImage(_ image_bytes: [UInt8], config: String) throws -> String
|
||||
/// Process an image file at the given path. `config` is a serialised `OcrConfig`.
|
||||
func processImageFile(path: String, config: String) throws -> String
|
||||
/// Whether the plugin supports the given language.
|
||||
func supportsLanguage(_ lang: String) -> Bool
|
||||
/// JSON-encoded `OcrBackendType`.
|
||||
func backendTypeJson() -> String
|
||||
/// Languages supported by this backend.
|
||||
func supportedLanguages() -> [String]
|
||||
/// Whether the plugin can detect tables.
|
||||
func supportsTableDetection() -> Bool
|
||||
/// Whether the plugin can process whole documents (vs single images).
|
||||
func supportsDocumentProcessing() -> Bool
|
||||
/// Process an entire document. `config` is a serialised `OcrConfig`; the return
|
||||
/// value must be a serialised `ExtractionResult`.
|
||||
func processDocument(path: String, config: String) throws -> String
|
||||
}
|
||||
|
||||
/// FFI adapter class for `OcrBackend`. Rust looks up `SwiftOcrBackendBox` by name from
|
||||
/// the `extern "Swift"` block and dispatches calls through `Unmanaged<T>.fromOpaque(...)`.
|
||||
public final class SwiftOcrBackendBox {
|
||||
private let inner: OcrBackend
|
||||
|
||||
public init(_ inner: OcrBackend) {
|
||||
self.inner = inner
|
||||
}
|
||||
|
||||
public func alef_name() -> RustString { RustString(inner.name()) }
|
||||
public func alef_version() -> RustString { RustString(inner.version()) }
|
||||
|
||||
public func alef_initialize() -> RustString {
|
||||
do {
|
||||
try inner.initialize()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
public func alef_shutdown() -> RustString {
|
||||
do {
|
||||
try inner.shutdown()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_process_image(image_bytes: RustVec<UInt8>, config: RustString) -> RustString {
|
||||
do {
|
||||
let bytes = Array(image_bytes)
|
||||
let result = try inner.processImage(bytes, config: config.toString())
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_process_image_file(path: RustString, config: RustString) -> RustString {
|
||||
do {
|
||||
let result = try inner.processImageFile(path: path.toString(), config: config.toString())
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_supports_language(lang: RustString) -> Bool {
|
||||
return inner.supportsLanguage(lang.toString())
|
||||
}
|
||||
|
||||
public func alef_backend_type() -> RustString {
|
||||
return RustString(inner.backendTypeJson())
|
||||
}
|
||||
|
||||
public func alef_supported_languages() -> RustVec<RustString> {
|
||||
let languages = inner.supportedLanguages()
|
||||
let vec = RustVec<RustString>()
|
||||
for lang in languages { vec.push(value: RustString(lang)) }
|
||||
return vec
|
||||
}
|
||||
|
||||
public func alef_supports_table_detection() -> Bool { inner.supportsTableDetection() }
|
||||
public func alef_supports_document_processing() -> Bool { inner.supportsDocumentProcessing() }
|
||||
|
||||
public func alef_process_document(path: RustString, config: RustString) -> RustString {
|
||||
do {
|
||||
let result = try inner.processDocument(path: path.toString(), config: config.toString())
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - PostProcessor
|
||||
|
||||
/// Swift-native protocol mirroring the Rust `PostProcessor` plugin trait.
|
||||
public protocol PostProcessor: AnyObject {
|
||||
func name() -> String
|
||||
func version() -> String
|
||||
func initialize() throws
|
||||
func shutdown() throws
|
||||
/// Process a serialised `ExtractionResult` (mutable on the Rust side, but we ferry
|
||||
/// the result as JSON in/out to avoid round-tripping references through the FFI).
|
||||
/// The return value is the post-processed `ExtractionResult` JSON.
|
||||
func processJson(result: String, config: String) throws -> String
|
||||
/// JSON-encoded `ProcessingStage`.
|
||||
func processingStageJson() -> String
|
||||
func shouldProcess(result: String, config: String) -> Bool
|
||||
func estimatedDurationMs(result: String) -> UInt64
|
||||
func priority() -> Int32
|
||||
}
|
||||
|
||||
public final class SwiftPostProcessorBox {
|
||||
private let inner: PostProcessor
|
||||
public init(_ inner: PostProcessor) { self.inner = inner }
|
||||
|
||||
public func alef_name() -> RustString { RustString(inner.name()) }
|
||||
public func alef_version() -> RustString { RustString(inner.version()) }
|
||||
public func alef_initialize() -> RustString {
|
||||
do {
|
||||
try inner.initialize()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
public func alef_shutdown() -> RustString {
|
||||
do {
|
||||
try inner.shutdown()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_process(result: RustString, config: RustString) -> RustString {
|
||||
do {
|
||||
let result = try inner.processJson(result: result.toString(), config: config.toString())
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_processing_stage() -> RustString { RustString(inner.processingStageJson()) }
|
||||
public func alef_should_process(result: RustString, config: RustString) -> Bool {
|
||||
inner.shouldProcess(result: result.toString(), config: config.toString())
|
||||
}
|
||||
public func alef_estimated_duration_ms(result: RustString) -> UInt64 {
|
||||
inner.estimatedDurationMs(result: result.toString())
|
||||
}
|
||||
public func alef_priority() -> Int32 { inner.priority() }
|
||||
}
|
||||
|
||||
// MARK: - Validator
|
||||
|
||||
/// Swift-native protocol mirroring the Rust `Validator` plugin trait.
|
||||
public protocol Validator: AnyObject {
|
||||
func name() -> String
|
||||
func version() -> String
|
||||
func initialize() throws
|
||||
func shutdown() throws
|
||||
/// Validate an `ExtractionResult` (passed as JSON). Throw to surface a validation error.
|
||||
func validate(result: String, config: String) throws
|
||||
func shouldValidate(result: String, config: String) -> Bool
|
||||
func priority() -> Int32
|
||||
}
|
||||
|
||||
public final class SwiftValidatorBox {
|
||||
private let inner: Validator
|
||||
public init(_ inner: Validator) { self.inner = inner }
|
||||
|
||||
public func alef_name() -> RustString { RustString(inner.name()) }
|
||||
public func alef_version() -> RustString { RustString(inner.version()) }
|
||||
public func alef_initialize() -> RustString {
|
||||
do {
|
||||
try inner.initialize()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
public func alef_shutdown() -> RustString {
|
||||
do {
|
||||
try inner.shutdown()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_validate(result: RustString, config: RustString) -> RustString {
|
||||
do {
|
||||
try inner.validate(result: result.toString(), config: config.toString())
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_should_validate(result: RustString, config: RustString) -> Bool {
|
||||
inner.shouldValidate(result: result.toString(), config: config.toString())
|
||||
}
|
||||
public func alef_priority() -> Int32 { inner.priority() }
|
||||
}
|
||||
|
||||
// MARK: - EmbeddingBackend
|
||||
|
||||
/// Swift-native protocol mirroring the Rust `EmbeddingBackend` plugin trait.
|
||||
public protocol EmbeddingBackend: AnyObject {
|
||||
func name() -> String
|
||||
func version() -> String
|
||||
func initialize() throws
|
||||
func shutdown() throws
|
||||
/// Embedding dimensions reported by the backend.
|
||||
func dimensions() -> UInt
|
||||
/// Embed a batch of texts. Returns a JSON-encoded `Vec<Vec<f32>>` (outer Vec = batch
|
||||
/// items, inner Vec = embedding components).
|
||||
func embed(_ texts: [String]) throws -> String
|
||||
}
|
||||
|
||||
public final class SwiftEmbeddingBackendBox {
|
||||
private let inner: EmbeddingBackend
|
||||
public init(_ inner: EmbeddingBackend) { self.inner = inner }
|
||||
|
||||
public func alef_name() -> RustString { RustString(inner.name()) }
|
||||
public func alef_version() -> RustString { RustString(inner.version()) }
|
||||
public func alef_initialize() -> RustString {
|
||||
do {
|
||||
try inner.initialize()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
public func alef_shutdown() -> RustString {
|
||||
do {
|
||||
try inner.shutdown()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_dimensions() -> UInt { inner.dimensions() }
|
||||
public func alef_embed(texts: RustVec<RustString>) -> RustString {
|
||||
do {
|
||||
// RustVec<RustString> iteration yields RustStringRef (borrowed). Use the
|
||||
// String(...) initializer that swift-bridge provides on RustStringRef to
|
||||
// copy the text out into an owned Swift String.
|
||||
var strings: [String] = []
|
||||
let count = texts.len()
|
||||
var idx: UInt = 0
|
||||
while idx < count {
|
||||
strings.append(texts.get(index: idx)!.as_str().toString())
|
||||
idx += 1
|
||||
}
|
||||
let result = try inner.embed(strings)
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - DocumentExtractor
|
||||
|
||||
/// Swift-native protocol mirroring the Rust `DocumentExtractor` plugin trait.
|
||||
///
|
||||
/// User-facing extraction surface: implement `extractBytes` (returns a JSON-encoded
|
||||
/// `InternalDocument`) and `supportedMimeTypes`. The remaining methods have default
|
||||
/// implementations that mirror the Rust trait's defaults.
|
||||
public protocol DocumentExtractor: AnyObject {
|
||||
func name() -> String
|
||||
func version() -> String
|
||||
func initialize() throws
|
||||
func shutdown() throws
|
||||
/// Extract from raw bytes. Return a JSON-encoded `InternalDocument`.
|
||||
func extractBytes(content: [UInt8], mimeType: String, config: String) throws -> String
|
||||
/// Extract from a filesystem path. Default reads the file and forwards to `extractBytes`.
|
||||
func extractFile(path: String, mimeType: String, config: String) throws -> String
|
||||
/// MIME types this extractor claims to support.
|
||||
func supportedMimeTypes() -> [String]
|
||||
/// Priority for the registry's selection ordering (0–255, default 50).
|
||||
func priority() -> Int32
|
||||
/// Whether this extractor can handle the given path + MIME pair.
|
||||
func canHandle(path: String, mimeType: String) -> Bool
|
||||
/// JSON-encoded handle to a synchronous extractor, if any. Default returns the
|
||||
/// JSON `null` sentinel (the Rust bridge does not currently dispatch sync paths).
|
||||
func asSyncExtractor() -> String
|
||||
}
|
||||
|
||||
extension DocumentExtractor {
|
||||
public func initialize() throws {}
|
||||
public func shutdown() throws {}
|
||||
public func extractFile(path: String, mimeType: String, config: String) throws -> String {
|
||||
let data = try Data(contentsOf: URL(fileURLWithPath: path))
|
||||
return try extractBytes(content: [UInt8](data), mimeType: mimeType, config: config)
|
||||
}
|
||||
public func priority() -> Int32 { 50 }
|
||||
public func canHandle(path: String, mimeType: String) -> Bool { true }
|
||||
public func asSyncExtractor() -> String { "null" }
|
||||
}
|
||||
|
||||
public final class SwiftDocumentExtractorBox {
|
||||
private let inner: DocumentExtractor
|
||||
|
||||
public init(_ inner: DocumentExtractor) {
|
||||
self.inner = inner
|
||||
}
|
||||
|
||||
public func alef_name() -> RustString { RustString(inner.name()) }
|
||||
public func alef_version() -> RustString { RustString(inner.version()) }
|
||||
|
||||
public func alef_initialize() -> RustString {
|
||||
do {
|
||||
try inner.initialize()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
public func alef_shutdown() -> RustString {
|
||||
do {
|
||||
try inner.shutdown()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_extract_bytes(content: RustVec<UInt8>, mime_type: RustString, config: RustString)
|
||||
-> RustString
|
||||
{
|
||||
do {
|
||||
let bytes = Array(content)
|
||||
let result = try inner.extractBytes(
|
||||
content: bytes, mimeType: mime_type.toString(), config: config.toString())
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_extract_file(path: RustString, mime_type: RustString, config: RustString)
|
||||
-> RustString
|
||||
{
|
||||
do {
|
||||
let result = try inner.extractFile(
|
||||
path: path.toString(), mimeType: mime_type.toString(), config: config.toString())
|
||||
return RustString("{\"ok\":\(result)}")
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_supported_mime_types() -> RustVec<RustString> {
|
||||
let mimes = inner.supportedMimeTypes()
|
||||
let vec = RustVec<RustString>()
|
||||
for mime in mimes { vec.push(value: RustString(mime)) }
|
||||
return vec
|
||||
}
|
||||
|
||||
public func alef_priority() -> Int32 { inner.priority() }
|
||||
|
||||
public func alef_can_handle(path: RustString, mime_type: RustString) -> Bool {
|
||||
inner.canHandle(path: path.toString(), mimeType: mime_type.toString())
|
||||
}
|
||||
|
||||
public func alef_as_sync_extractor() -> RustString { RustString(inner.asSyncExtractor()) }
|
||||
}
|
||||
|
||||
// MARK: - Renderer
|
||||
|
||||
/// Swift-native protocol mirroring the Rust `Renderer` plugin trait.
|
||||
///
|
||||
/// Implement `render` to convert a JSON-encoded `InternalDocument` to the
|
||||
/// renderer's target output format. The Rust bridge encodes the document
|
||||
/// before crossing the FFI boundary; on success return the rendered string.
|
||||
public protocol Renderer: AnyObject {
|
||||
func name() -> String
|
||||
func version() -> String
|
||||
func initialize() throws
|
||||
func shutdown() throws
|
||||
/// Render the document. `doc` is a JSON-encoded `InternalDocument`; return
|
||||
/// the rendered output as a string.
|
||||
func render(doc: String) throws -> String
|
||||
}
|
||||
|
||||
extension Renderer {
|
||||
public func initialize() throws {}
|
||||
public func shutdown() throws {}
|
||||
}
|
||||
|
||||
public final class SwiftRendererBox {
|
||||
private let inner: Renderer
|
||||
|
||||
public init(_ inner: Renderer) {
|
||||
self.inner = inner
|
||||
}
|
||||
|
||||
public func alef_name() -> RustString { RustString(inner.name()) }
|
||||
public func alef_version() -> RustString { RustString(inner.version()) }
|
||||
|
||||
public func alef_initialize() -> RustString {
|
||||
do {
|
||||
try inner.initialize()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
public func alef_shutdown() -> RustString {
|
||||
do {
|
||||
try inner.shutdown()
|
||||
return encodeOkVoidEnvelope()
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
|
||||
public func alef_render(doc: RustString) -> RustString {
|
||||
do {
|
||||
let result = try inner.render(doc: doc.toString())
|
||||
// Wrap the rendered string in an `{"ok": "..."}` envelope. `result` is an
|
||||
// arbitrary string (markdown/html/etc.), so JSONSerialization handles
|
||||
// escaping safely.
|
||||
let payload: [String: Any] = ["ok": result]
|
||||
let data = try JSONSerialization.data(withJSONObject: payload, options: [])
|
||||
let json = String(data: data, encoding: .utf8) ?? "{\"ok\":\"\"}"
|
||||
return RustString(json)
|
||||
} catch { return encodeErrEnvelope("\(error)") }
|
||||
}
|
||||
}
|
||||
7
packages/swift/Sources/RustBridge/RustBridge.swift
generated
Normal file
7
packages/swift/Sources/RustBridge/RustBridge.swift
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
// Placeholder Swift source for the RustBridge target.
|
||||
// Run `cargo build -p kreuzberg-swift` and then rerun `alef generate` to replace
|
||||
// this file with swift-bridge output. See README.md for instructions.
|
||||
//
|
||||
// This file is intentionally minimal so SwiftPM accepts the target before
|
||||
// the cargo build step has been run.
|
||||
public enum RustBridgePlaceholder {}
|
||||
1350
packages/swift/Sources/RustBridge/SwiftBridgeCore.swift
generated
Normal file
1350
packages/swift/Sources/RustBridge/SwiftBridgeCore.swift
generated
Normal file
File diff suppressed because it is too large
Load Diff
18885
packages/swift/Sources/RustBridge/kreuzberg-swift.swift
generated
Normal file
18885
packages/swift/Sources/RustBridge/kreuzberg-swift.swift
generated
Normal file
File diff suppressed because it is too large
Load Diff
2
packages/swift/Sources/RustBridge/module.modulemap
generated
Normal file
2
packages/swift/Sources/RustBridge/module.modulemap
generated
Normal file
@@ -0,0 +1,2 @@
|
||||
// This modulemap is unused — the RustBridgeC target provides the C types.
|
||||
// SwiftPM discovers RustBridgeC.h via the publicHeadersPath setting.
|
||||
3577
packages/swift/Sources/RustBridgeC/RustBridgeC.h
generated
Normal file
3577
packages/swift/Sources/RustBridgeC/RustBridgeC.h
generated
Normal file
File diff suppressed because it is too large
Load Diff
11
packages/swift/Tests/KreuzbergTests/KreuzbergTests.swift
generated
Normal file
11
packages/swift/Tests/KreuzbergTests/KreuzbergTests.swift
generated
Normal file
@@ -0,0 +1,11 @@
|
||||
import XCTest
|
||||
|
||||
@testable import Kreuzberg
|
||||
|
||||
final class KreuzbergTests: XCTestCase {
|
||||
func testPlaceholder() throws {
|
||||
// Placeholder test so `swift test` has a target to run.
|
||||
// Replace or extend with real tests against the Kreuzberg module.
|
||||
XCTAssertTrue(true)
|
||||
}
|
||||
}
|
||||
42
packages/swift/rust/Cargo.toml
generated
Normal file
42
packages/swift/rust/Cargo.toml
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
# Generated by alef. Do not edit by hand.
|
||||
[package]
|
||||
name = "kreuzberg-swift"
|
||||
version = "5.0.0-rc.3"
|
||||
edition = "2024"
|
||||
license = "Elastic-2.0"
|
||||
|
||||
# `ahash`, `async-trait`, `libc`, `serde`, `serde_json`, and `tokio` are all
|
||||
# conditionally referenced by alef-emitted code: `ahash` only when the
|
||||
# umbrella crate exposes `AHashMap<Cow<str>, _>` parameters (the conditional
|
||||
# `__*_ahash` shim rebuilds), `async-trait` and `tokio` only when the API
|
||||
# surface includes async streaming adapters and runtime spawn, `libc` only
|
||||
# when service API C callback functions are emitted, `serde` and
|
||||
# `serde_json` only when JSON DTO conversions are emitted. They are listed
|
||||
# unconditionally in `[dependencies]` so the manifest is stable across
|
||||
# regens, and ignored here so cargo-machete does not flag downstream crates
|
||||
# whose API surface does not trigger those paths as unused.
|
||||
[package.metadata.cargo-machete]
|
||||
ignored = ["ahash", "async-trait", "libc", "serde", "serde_json", "tokio"]
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib", "staticlib"]
|
||||
# The `extern "Swift"` block emits linker references that are only resolvable
|
||||
# when the crate is linked into a Swift target. `cargo test --workspace` on
|
||||
# pure-Rust runners (e.g. windows-latest) would otherwise fail with
|
||||
# undefined `__swift_bridge__$*$alef_visit_*` symbols.
|
||||
test = false
|
||||
doctest = false
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
ahash = "0.8"
|
||||
async-trait = "0.1"
|
||||
kreuzberg = { version = "5.0.0-rc.3", path = "../../../crates/kreuzberg", features = ["full", "ocr-wasm"] }
|
||||
libc = "0.2"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
swift-bridge = "0.1.59"
|
||||
tokio = { version = "1", features = ["rt", "rt-multi-thread", "macros"] }
|
||||
|
||||
[build-dependencies]
|
||||
swift-bridge-build = "0.1.59"
|
||||
10
packages/swift/rust/build.rs
generated
Normal file
10
packages/swift/rust/build.rs
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
// Generated by alef. Do not edit by hand.
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() {
|
||||
let out_dir = PathBuf::from(std::env::var("OUT_DIR").expect("OUT_DIR unset"));
|
||||
let crate_name = std::env::var("CARGO_PKG_NAME").expect("CARGO_PKG_NAME unset");
|
||||
let bridges = vec!["src/lib.rs"];
|
||||
swift_bridge_build::parse_bridges(bridges).write_all_concatenated(out_dir, &crate_name);
|
||||
println!("cargo:rerun-if-changed=src/lib.rs");
|
||||
}
|
||||
13779
packages/swift/rust/src/lib.rs
generated
Normal file
13779
packages/swift/rust/src/lib.rs
generated
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user