Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
// Build a fully-featured `ExtractionConfig` via JSON. ExtractionConfig has
// 30+ fields, so JSON is the ergonomic path for non-trivial configs.
let configJson = """
{
"use_cache": true,
"enable_quality_processing": true,
"ocr": {
"backend": "tesseract",
"language": "eng"
},
"chunking": {
"max_characters": 1000,
"overlap": 200,
"embedding": {
"model": {"preset": {"name": "balanced"}},
"batch_size": 32,
"normalize": true,
"show_download_progress": false
}
},
"language_detection": {
"enabled": true,
"min_confidence": 0.8,
"detect_multiple": false
},
"keywords": {
"algorithm": "yake",
"max_keywords": 10,
"min_score": 0.1,
"ngram_range": [1, 3],
"language": "en"
},
"token_reduction": {
"mode": "moderate",
"preserve_important_words": true
},
"postprocessor": {
"enabled": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Content: \(result.content().toString())")
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Languages: \(langs)")
}
if let chunks = result.chunks() {
print("Chunks: \(chunks.count)")
}
```

View File

@@ -0,0 +1,27 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"chunking": {
"max_characters": 1000,
"overlap": 100,
"chunker_type": "markdown",
"prepend_heading_context": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.md", nil, config)
if let chunks = result.chunks() {
print("Chunks: \(chunks.count)")
for chunk in chunks {
let content = chunk.content().toString()
print("Length: \(content.count)")
}
}
```

View File

@@ -0,0 +1,17 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"use_cache": true,
"enable_quality_processing": true
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print(result.content().toString())
```

View File

@@ -0,0 +1,21 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
// Swift bindings build configs via JSON. To honor an on-disk
// `kreuzberg.{toml,yaml,json}`, load the file and pass its JSON
// representation to `extractionConfigFromJson`. Unknown formats
// can be normalized to JSON on the caller side.
let configJson: String
if let data = try? Data(contentsOf: URL(fileURLWithPath: "kreuzberg.json")),
let text = String(data: data, encoding: .utf8) {
configJson = text
} else {
configJson = "{}"
}
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print(result.content().toString())
```

View File

@@ -0,0 +1,20 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"ocr": {
"backend": "tesseract",
"language": "eng"
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("scanned.pdf", nil, config)
print("Content length: \(result.content().toString().count)")
print("Tables detected: \(result.tables().count)")
```

View File

@@ -0,0 +1,28 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"use_cache": true,
"enable_quality_processing": true,
"ocr": {
"backend": "tesseract",
"language": "eng+deu",
"tesseract_config": {
"psm": 6
}
},
"chunking": {
"max_characters": 1000,
"overlap": 200
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Content length: \(result.content().toString().count)")
```

View File

@@ -0,0 +1,18 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"include_document_structure": true
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let document = result.document() {
print("Document nodes: \(document.nodes().count)")
}
```

View File

@@ -0,0 +1,22 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"result_format": "element_based"
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let elements = result.elements() {
print("Elements: \(elements.count)")
for element in elements {
print("Type: \(element.element_type().toString())")
print("Text: \(element.text().toString().prefix(100))")
}
}
```

View File

@@ -0,0 +1,27 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"chunking": {
"max_characters": 1000,
"overlap": 200,
"embedding": {
"model": {"preset": {"name": "balanced"}},
"batch_size": 16,
"normalize": true,
"show_download_progress": true
}
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let chunks = result.chunks() {
print("Chunks with embeddings: \(chunks.count)")
}
```

View File

@@ -0,0 +1,19 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"output_format": "html",
"html_output": {
"theme": "github"
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print(result.content().toString()) // HTML with kb-* classes
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"keywords": {
"algorithm": "yake",
"max_keywords": 10,
"min_score": 0.1,
"ngram_range": [1, 3],
"language": "en"
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Keywords extracted from document")
print("Content length: \(result.content().toString().count)")
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"language_detection": {
"enabled": true,
"min_confidence": 0.8,
"detect_multiple": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Detected languages: \(langs)")
}
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"images": {
"extract_images": true,
"target_dpi": 300,
"max_image_dimension": 4096,
"auto_adjust_dpi": true,
"min_dpi": 150,
"max_dpi": 600
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Content length: \(result.content().toString().count)")
```

View File

@@ -0,0 +1,20 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"pdf_options": {
"extract_images": true,
"passwords": ["password123"],
"extract_metadata": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("encrypted.pdf", nil, config)
print("Content length: \(result.content().toString().count)")
```

View File

@@ -0,0 +1,24 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"pdf_options": {
"hierarchy": {
"enabled": true,
"detection_threshold": 0.75,
"ocr_coverage_threshold": 0.8,
"min_level": 1,
"max_level": 5
}
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Content length: \(result.content().toString().count)")
```

View File

@@ -0,0 +1,19 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"postprocessor": {
"enabled": true,
"enabled_processors": ["whitespace_normalizer", "unicode_normalizer"]
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Processed content: \(result.content().toString())")
```

View File

@@ -0,0 +1,18 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"enable_quality_processing": true,
"use_cache": true
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Content length: \(result.content().toString().count)")
print("Tables: \(result.tables().count)")
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"ocr": {
"backend": "tesseract",
"language": "eng+deu",
"tesseract_config": {
"psm": 6,
"oem": 3
}
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("scanned.pdf", nil, config)
print("OCR text: \(result.content().toString())")
```

View File

@@ -0,0 +1,19 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"token_reduction": {
"mode": "moderate",
"preserve_important_words": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Reduced content length: \(result.content().toString().count)")
```