Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json =
\\{
\\ "chunking": {
\\ "max_characters": 1500,
\\ "overlap": 200
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,27 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// Chunking + embeddings produces RAG-ready output. Each chunk in the
// returned JSON carries `content`, position metadata, and (when an
// embedding preset is configured) an `embedding` vector.
pub fn main() !void {
const config_json =
\\{
\\ "chunking": {
\\ "max_characters": 500,
\\ "overlap": 50,
\\ "embedding": {
\\ "preset": "balanced"
\\ }
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("research_paper.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,24 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json =
\\{
\\ "chunking": {
\\ "max_characters": 1024,
\\ "overlap": 100,
\\ "embedding": {
\\ "preset": "balanced"
\\ }
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,22 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json =
\\{
\\ "keywords": {
\\ "algorithm": "yake",
\\ "max_keywords": 10,
\\ "min_score": 0.3
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("research_paper.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,18 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json =
\\{
\\ "enable_quality_processing": true
\\}
;
const result_json = try kreuzberg.extract_file_sync("scanned_document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,26 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// `embed_texts` takes JSON-encoded inputs across the FFI boundary:
// - `texts`: a JSON array of strings
// - `config`: a JSON-encoded `EmbeddingConfig`
// It returns a JSON-encoded 2D float array (one row per input text).
pub fn main() !void {
const texts_json =
\\["Hello, world!", "Kreuzberg is fast"]
;
const config_json =
\\{
\\ "model": {"type": "preset", "name": "balanced"},
\\ "normalize": true
\\}
;
const embeddings_json = try kreuzberg.embed_texts(texts_json, config_json);
defer std.heap.c_allocator.free(embeddings_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{embeddings_json});
}
```

View File

@@ -0,0 +1,21 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json =
\\{
\\ "token_reduction": {
\\ "mode": "moderate",
\\ "preserve_important_words": true
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,21 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json =
\\{
\\ "token_reduction": {
\\ "mode": "moderate",
\\ "preserve_important_words": true
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("verbose_document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,28 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// Configure chunking with embeddings — the resulting JSON has a `chunks`
// array where each entry carries `content` and `embedding`. Insert those
// into your vector store (Qdrant, pgvector, Pinecone, etc.) directly from
// the parsed JSON.
pub fn main() !void {
const config_json =
\\{
\\ "chunking": {
\\ "max_characters": 512,
\\ "overlap": 50,
\\ "embedding": {
\\ "preset": "balanced"
\\ }
\\ }
\\}
;
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```