Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"language_detection": {
"enabled": true,
"min_confidence": 0.8,
"detect_multiple": false
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Detected languages: \(langs)")
} else {
print("No languages detected")
}
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"language_detection": {
"enabled": true,
"min_confidence": 0.8,
"detect_multiple": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("multilingual_document.pdf", nil, config)
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Detected languages: \(langs)")
}
```

View File

@@ -0,0 +1,43 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let config = try extractionConfigFromJson("{}")
let result = try extractFileSync("document.pdf", nil, config)
let metadata = result.metadata()
if let title = metadata.title() {
print("Title: \(title.toString())")
}
if let subject = metadata.subject() {
print("Subject: \(subject.toString())")
}
if let language = metadata.language() {
print("Language: \(language.toString())")
}
if let createdAt = metadata.created_at() {
print("Created at: \(createdAt.toString())")
}
if let modifiedAt = metadata.modified_at() {
print("Modified at: \(modifiedAt.toString())")
}
if let createdBy = metadata.created_by() {
print("Created by: \(createdBy.toString())")
}
if let authors = metadata.authors() {
let names = authors.map { $0.toString() }
print("Authors: \(names)")
}
if let keywords = metadata.keywords() {
let words = keywords.map { $0.toString() }
print("Keywords: \(words)")
}
if let duration = metadata.extraction_duration_ms() {
print("Extraction duration (ms): \(duration)")
}
if let pages = metadata.pages() {
print("Page count: \(pages.total_count())")
}
```

View File

@@ -0,0 +1,35 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let config = try extractionConfigFromJson("{}")
let result = try extractFileSync("document.pdf", nil, config)
let content = result.content().toString()
let utf8 = Array(content.utf8)
guard let pageStructure = result.metadata().pages() else {
print("No page structure available")
exit(0)
}
guard let boundaries = pageStructure.boundaries() else {
print("No page boundaries available")
exit(0)
}
for (index, boundary) in boundaries.enumerated() {
if index >= 3 { break }
let byteStart = boundary.byte_start()
let byteEnd = boundary.byte_end()
let pageBytes = Array(utf8[byteStart..<byteEnd])
let pageText = String(bytes: pageBytes, encoding: .utf8) ?? ""
let previewEnd = min(100, pageText.count)
let preview = String(pageText.prefix(previewEnd))
print("Page \(boundary.page_number()):")
print(" Byte range: \(byteStart)-\(byteEnd)")
print(" Preview: \(preview)...")
}
```

View File

@@ -0,0 +1,28 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"pages": {
"extract_pages": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let pages = result.pages() {
for page in pages {
let pageContent = page.content().toString()
print("Page \(page.page_number()):")
print(" Content: \(pageContent.count) chars")
print(" Tables: \(page.tables().count)")
print(" Images: \(page.images().count)")
}
} else {
print("No per-page content available")
}
```

View File

@@ -0,0 +1,20 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let config = try extractionConfigFromJson("{}")
let result = try extractFileSync("document.pdf", nil, config)
let tables = result.tables()
print("Tables: \(tables.count)")
for (index, table) in tables.enumerated() {
print("Table \(index) on page \(table.page_number())")
print(table.markdown().toString())
if let bbox = table.bounding_box() {
print(" Bounding box: \(bbox.toString())")
}
}
```

View File

@@ -0,0 +1,55 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
struct VectorRecord {
let id: String
let content: String
let embedding: [Float]
let metadata: [String: String]
}
let configJson = """
{
"chunking": {
"max_characters": 512,
"overlap": 50,
"embedding": {
"model": {"preset": {"name": "balanced"}},
"batch_size": 32,
"normalize": true
}
}
}
"""
let documentId = "doc_001"
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
var records: [VectorRecord] = []
if let chunks = result.chunks() {
for (index, chunk) in chunks.enumerated() {
guard let embedding = chunk.embedding() else { continue }
let content = chunk.content().toString()
let vector = embedding.map { $0 }
var metadata: [String: String] = [:]
metadata["document_id"] = documentId
metadata["chunk_index"] = String(index)
metadata["content_length"] = String(content.count)
records.append(VectorRecord(
id: "\(documentId)_chunk_\(index)",
content: content,
embedding: vector,
metadata: metadata
))
}
}
print("Generated \(records.count) vector records")
```