Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
// `BatchBytesItem` is an opaque swift-bridge class with no public Swift
// constructor — build items from JSON via `batchBytesItemFromJson`.
// `content` must be encoded as a JSON byte array.
func encodeBytesAsJsonArray(_ bytes: [UInt8]) -> String {
"[" + bytes.map { String($0) }.joined(separator: ",") + "]"
}
let items = RustVec<BatchBytesItem>()
let first = Array("Hello, world!".utf8)
items.push(value: try batchBytesItemFromJson(
"{\"content\": \(encodeBytesAsJsonArray(first)), \"mime_type\": \"text/plain\"}"
))
let second = Array("# Heading\n\nParagraph text.".utf8)
items.push(value: try batchBytesItemFromJson(
"{\"content\": \(encodeBytesAsJsonArray(second)), \"mime_type\": \"text/markdown\"}"
))
let config = try extractionConfigFromJson("{}")
let results = try batchExtractBytesSync(items, config)
for (index, result) in results.enumerated() {
print("Item \(index): \(result.content().toString().count) chars")
}
```

View File

@@ -0,0 +1,20 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
// `BatchFileItem` is an opaque swift-bridge class with no public Swift
// constructor — build items from JSON via `batchFileItemFromJson`.
let items = RustVec<BatchFileItem>()
for path in ["doc1.pdf", "doc2.docx", "report.pdf"] {
let json = "{\"path\": \"\(path)\"}"
items.push(value: try batchFileItemFromJson(json))
}
let config = try extractionConfigFromJson("{}")
let results = try batchExtractFilesSync(items, config)
for (index, result) in results.enumerated() {
print("File \(index): \(result.content().toString().count) chars")
}
```

View File

@@ -0,0 +1,44 @@
```swift title="Swift"
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
@main
struct App {
static func main() async throws {
let payload: [String: Any] = [
"text": "Your long text content here...",
"chunker_type": "text",
"config": [
"max_characters": 1000,
"overlap": 50,
"trim": true,
],
]
var request = URLRequest(url: URL(string: "http://localhost:8000/chunk")!)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = try JSONSerialization.data(withJSONObject: payload)
let (data, response) = try await URLSession.shared.data(for: request)
guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode) else {
throw NSError(domain: "kreuzberg", code: 1)
}
let result = try JSONSerialization.jsonObject(with: data) as? [String: Any] ?? [:]
let chunkCount = result["chunk_count"] as? Int ?? 0
print("Created \(chunkCount) chunks")
if let chunks = result["chunks"] as? [[String: Any]] {
for chunk in chunks {
let content = chunk["content"] as? String ?? ""
let index = chunk["chunk_index"] as? Int ?? 0
let preview = String(content.prefix(50))
print("Chunk \(index): \(preview)...")
}
}
}
}
```

View File

@@ -0,0 +1,42 @@
```swift title="Swift"
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
@main
struct App {
static func main() async throws {
let fileURL = URL(fileURLWithPath: "document.pdf")
let fileData = try Data(contentsOf: fileURL)
let fileName = fileURL.lastPathComponent
let boundary = "Boundary-\(UUID().uuidString)"
var request = URLRequest(url: URL(string: "http://localhost:8000/extract")!)
request.httpMethod = "POST"
request.setValue(
"multipart/form-data; boundary=\(boundary)",
forHTTPHeaderField: "Content-Type"
)
var body = Data()
body.append("--\(boundary)\r\n".data(using: .utf8)!)
body.append(
"Content-Disposition: form-data; name=\"file\"; filename=\"\(fileName)\"\r\n"
.data(using: .utf8)!
)
body.append("Content-Type: application/pdf\r\n\r\n".data(using: .utf8)!)
body.append(fileData)
body.append("\r\n--\(boundary)--\r\n".data(using: .utf8)!)
request.httpBody = body
let (data, response) = try await URLSession.shared.data(for: request)
guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode) else {
throw NSError(domain: "kreuzberg", code: 1)
}
let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] ?? [:]
print(json["content"] as? String ?? "")
}
}
```

View File

@@ -0,0 +1,53 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
// Build a fully-featured `ExtractionConfig` via JSON. The opaque swift-bridge
// initializer takes 30+ positional parameters, so JSON is the ergonomic path
// for non-trivial configs.
let configJson = """
{
"use_cache": true,
"enable_quality_processing": true,
"ocr": {
"backend": "tesseract",
"language": "eng"
},
"force_ocr": false,
"chunking": {
"max_characters": 800,
"overlap": 100,
"chunker_type": "markdown",
"prepend_heading_context": true
},
"images": {
"extract_images": true
},
"output_format": "markdown",
"include_document_structure": true
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("report.pdf", nil, config)
let content = result.content().toString()
print("Content (\(content.count) chars):")
let preview = String(content.prefix(200))
print(preview)
if let chunks = result.chunks() {
print("\nChunks: \(chunks.count)")
}
print("Tables: \(result.tables().count)")
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Languages: \(langs)")
}
if let method = result.extraction_method() {
print("Extraction method: \(method)")
}
```

View File

@@ -0,0 +1,31 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
// The Swift binding throws `RustString` (not `KreuzbergError`) for every
// failure surfaced from the Rust core. The string preserves the original
// error variant name and message (e.g. "UnsupportedFormat: ...",
// "MissingDependency: ...", "Parsing: ...") so callers can pattern-match
// on the prefix or simply print the message.
do {
let config = try extractionConfigFromJson("{}")
let result = try extractFileSync("document.pdf", nil, config)
print(result.content().toString())
} catch let error as RustString {
let message = error.toString()
if message.contains("UnsupportedFormat") {
print("Unsupported format: \(message)")
} else if message.contains("MissingDependency") {
print("Install the required dependency: \(message)")
} else if message.contains("Parsing") {
print("Corrupt or invalid document: \(message)")
} else if message.contains("Io") {
print("File error: \(message)")
} else {
print("Extraction failed: \(message)")
}
} catch {
print("Unexpected error: \(error)")
}
```

View File

@@ -0,0 +1,32 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
func extractText(bytes: [UInt8], mimeType: String) throws -> String {
let content = RustVec<UInt8>()
for byte in bytes { content.push(value: byte) }
let config = try extractionConfigFromJson("{}")
let result = try extractBytesSync(content, mimeType, config)
return result.content().toString()
}
let data = (try? Data(contentsOf: URL(fileURLWithPath: "document.pdf"))) ?? Data()
let bytes = Array(data)
do {
let text = try extractText(bytes: bytes, mimeType: "application/pdf")
print("Extracted \(text.count) chars")
} catch let error as RustString {
let message = error.toString()
if message.contains("UnsupportedFormat") {
print("Format not supported: \(message)")
} else if message.contains("Ocr") {
print("OCR failed: \(message)")
} else {
print("Error: \(message)")
}
} catch {
print("Unexpected error: \(error)")
}
```

View File

@@ -0,0 +1,20 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
@main
struct App {
static func main() async throws {
let data = try Data(contentsOf: URL(fileURLWithPath: "document.pdf"))
let content = RustVec<UInt8>()
for byte in data { content.push(value: byte) }
let config = try extractionConfigFromJson("{}")
let result = try await extractBytes(content, "application/pdf", config)
print(result.content().toString())
print("Tables: \(result.tables().count)")
}
}
```

View File

@@ -0,0 +1,15 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let data = try Data(contentsOf: URL(fileURLWithPath: "document.pdf"))
let content = RustVec<UInt8>()
for byte in data { content.push(value: byte) }
let config = try extractionConfigFromJson("{}")
let result = try extractBytesSync(content, "application/pdf", config)
print(result.content().toString())
print("Tables: \(result.tables().count)")
```

View File

@@ -0,0 +1,20 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
@main
struct App {
static func main() async throws {
let config = try extractionConfigFromJson("{}")
// The Swift binding exposes async-compatible entrypoints; even though
// the bridge calls are synchronous internally, callers may `await` them
// to integrate with Swift Concurrency.
let result = try await extractFile("document.pdf", nil, config)
print(result.content().toString())
print("MIME type: \(result.mime_type().toString())")
print("Tables: \(result.tables().count)")
}
}
```

View File

@@ -0,0 +1,12 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let config = try extractionConfigFromJson("{}")
let result = try extractFileSync("document.pdf", nil, config)
print(result.content().toString())
print("MIME type: \(result.mime_type().toString())")
print("Tables: \(result.tables().count)")
```