Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"chunking": {
"max_characters": 500,
"overlap": 50
},
"pages": {
"extract_pages": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let chunks = result.chunks() {
for chunk in chunks {
let metadata = chunk.metadata()
let content = chunk.content().toString()
let preview = String(content.prefix(50))
if let first = metadata.first_page(), let last = metadata.last_page() {
let pageRange = first == last ? "Page \(first)" : "Pages \(first)-\(last)"
print("Chunk: \(preview)... (\(pageRange))")
} else {
print("Chunk: \(preview)... (no page info)")
}
}
}
```

View File

@@ -0,0 +1,25 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"chunking": {
"max_characters": 1000,
"overlap": 200
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let chunks = result.chunks() {
print("Chunks: \(chunks.count)")
for chunk in chunks {
let metadata = chunk.metadata()
print("Chunk \(metadata.chunk_index() + 1)/\(metadata.total_chunks())")
}
}
```

View File

@@ -0,0 +1,35 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"chunking": {
"max_characters": 500,
"overlap": 50,
"embedding": {
"model": {"preset": {"name": "balanced"}},
"normalize": true
}
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("research_paper.pdf", nil, config)
if let chunks = result.chunks() {
for chunk in chunks {
let metadata = chunk.metadata()
let content = chunk.content().toString()
let preview = String(content.prefix(100))
print("Chunk \(metadata.chunk_index() + 1)/\(metadata.total_chunks())")
print("Position: \(metadata.byte_start())-\(metadata.byte_end())")
print("Content: \(preview)...")
if let embedding = chunk.embedding() {
print("Embedding: \(embedding.count) dimensions")
}
}
}
```

View File

@@ -0,0 +1,32 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"chunking": {
"max_characters": 1024,
"overlap": 100,
"embedding": {
"model": {"preset": {"name": "balanced"}},
"normalize": true,
"batch_size": 32,
"show_download_progress": false
}
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let chunks = result.chunks() {
print("Generated \(chunks.count) chunks")
for chunk in chunks {
if let embedding = chunk.embedding() {
print("Chunk \(chunk.metadata().chunk_index()) -> \(embedding.count)-dim embedding")
}
}
}
```

View File

@@ -0,0 +1,24 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"keywords": {
"algorithm": "yake",
"max_keywords": 10,
"min_score": 0.3,
"ngram_range": [1, 3],
"language": "en"
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let keywords = result.extracted_keywords() {
print("Extracted \(keywords.count) keywords")
}
```

View File

@@ -0,0 +1,26 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"keywords": {
"algorithm": "yake",
"max_keywords": 10,
"min_score": 0.3
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("research_paper.pdf", nil, config)
if let keywords = result.extracted_keywords() {
for keyword in keywords {
let text = keyword.text().toString()
let score = keyword.score()
print("\(text) (score: \(score))")
}
}
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"language_detection": {
"enabled": true,
"min_confidence": 0.8,
"detect_multiple": false
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Detected: \(langs)")
}
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"language_detection": {
"enabled": true,
"min_confidence": 0.8,
"detect_multiple": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("multilingual_document.pdf", nil, config)
if let languages = result.detected_languages() {
let langs = languages.map { $0.toString() }
print("Detected languages: \(langs)")
}
```

View File

@@ -0,0 +1,18 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"enable_quality_processing": true
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
if let score = result.quality_score() {
print(String(format: "Quality score: %.2f", score))
}
```

View File

@@ -0,0 +1,22 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"enable_quality_processing": true
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("scanned_document.pdf", nil, config)
if let score = result.quality_score() {
if score < 0.5 {
print(String(format: "Warning: Low quality extraction (%.2f)", score))
} else {
print(String(format: "Quality score: %.2f", score))
}
}
```

View File

@@ -0,0 +1,21 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"token_reduction": {
"mode": "moderate",
"preserve_markdown": true,
"preserve_code": true,
"language_hint": "eng"
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("document.pdf", nil, config)
print("Reduced content length: \(result.content().toString().count)")
```

View File

@@ -0,0 +1,23 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
let configJson = """
{
"token_reduction": {
"mode": "moderate",
"preserve_markdown": true
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync("verbose_document.pdf", nil, config)
let content = result.content().toString()
print("Reduced content length: \(content.count)")
for warning in result.processing_warnings() {
print("Warning [\(warning.source().toString())]: \(warning.message().toString())")
}
```

View File

@@ -0,0 +1,51 @@
```swift title="Swift"
import Foundation
import Kreuzberg
import RustBridge
struct VectorRecord {
let id: String
let content: String
let embedding: [Float]
let metadata: [String: String]
}
func extractAndVectorize(documentPath: String, documentId: String) throws -> [VectorRecord] {
let configJson = """
{
"chunking": {
"max_characters": 512,
"overlap": 50,
"embedding": {
"model": {"preset": {"name": "balanced"}},
"normalize": true,
"batch_size": 32
}
}
}
"""
let config = try extractionConfigFromJson(configJson)
let result = try extractFileSync(documentPath, nil, config)
var records: [VectorRecord] = []
if let chunks = result.chunks() {
for (index, chunk) in chunks.enumerated() {
guard let embedding = chunk.embedding() else { continue }
let content = chunk.content().toString()
let metadata: [String: String] = [
"document_id": documentId,
"chunk_index": String(index),
"content_length": String(content.count),
]
records.append(VectorRecord(
id: "\(documentId)_chunk_\(index)",
content: content,
embedding: embedding.map { $0 },
metadata: metadata
))
}
}
return records
}
```