Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,22 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
// Batch items are passed as a JSON-encoded array across the FFI boundary.
// `content` is base64-encoded bytes per the FFI schema for BatchBytesItem.
const items_json =
\\[
\\ {"content": "SGVsbG8sIHdvcmxkIQ==", "mime_type": "text/plain", "config": null},
\\ {"content": "IyBIZWFkaW5nCgpQYXJhZ3JhcGggdGV4dC4=", "mime_type": "text/markdown", "config": null}
\\]
;
const config_json = "{}";
const results_json = try kreuzberg.batch_extract_bytes_sync(items_json, config_json);
defer std.heap.c_allocator.free(results_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{results_json});
}
```

View File

@@ -0,0 +1,22 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
// Batch items are passed as a JSON-encoded array across the FFI boundary.
const items_json =
\\[
\\ {"path": "doc1.pdf", "config": null},
\\ {"path": "doc2.docx", "config": null},
\\ {"path": "report.pdf", "config": null}
\\]
;
const config_json = "{}";
const results_json = try kreuzberg.batch_extract_files_sync(items_json, config_json);
defer std.heap.c_allocator.free(results_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{results_json});
}
```

View File

@@ -0,0 +1,53 @@
```zig title="Zig"
const std = @import("std");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const file_bytes = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
defer allocator.free(file_bytes);
const boundary = "----kreuzberg-zig-boundary";
var body = std.ArrayList(u8).init(allocator);
defer body.deinit();
try body.writer().print(
"--{s}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"document.pdf\"\r\n" ++
"Content-Type: application/pdf\r\n\r\n",
.{boundary},
);
try body.appendSlice(file_bytes);
try body.writer().print(
"\r\n--{s}\r\nContent-Disposition: form-data; name=\"chunking\"\r\n\r\n" ++
"{{\"max_characters\":800,\"overlap\":100}}\r\n--{s}--\r\n",
.{ boundary, boundary },
);
var client = std.http.Client{ .allocator = allocator };
defer client.deinit();
const uri = try std.Uri.parse("http://localhost:8000/extract");
var header_buf: [4096]u8 = undefined;
var req = try client.open(.POST, uri, .{
.server_header_buffer = &header_buf,
.extra_headers = &.{
.{ .name = "content-type", .value = "multipart/form-data; boundary=" ++ boundary },
},
});
defer req.deinit();
req.transfer_encoding = .{ .content_length = body.items.len };
try req.send();
try req.writeAll(body.items);
try req.finish();
try req.wait();
const response_body = try req.reader().readAllAlloc(allocator, 16 * 1024 * 1024);
defer allocator.free(response_body);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{response_body});
}
```

View File

@@ -0,0 +1,49 @@
```zig title="Zig"
const std = @import("std");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const file_bytes = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
defer allocator.free(file_bytes);
const boundary = "----kreuzberg-zig-boundary";
var body = std.ArrayList(u8).init(allocator);
defer body.deinit();
try body.writer().print(
"--{s}\r\nContent-Disposition: form-data; name=\"file\"; filename=\"document.pdf\"\r\n" ++
"Content-Type: application/pdf\r\n\r\n",
.{boundary},
);
try body.appendSlice(file_bytes);
try body.writer().print("\r\n--{s}--\r\n", .{boundary});
var client = std.http.Client{ .allocator = allocator };
defer client.deinit();
const uri = try std.Uri.parse("http://localhost:8000/extract");
var header_buf: [4096]u8 = undefined;
var req = try client.open(.POST, uri, .{
.server_header_buffer = &header_buf,
.extra_headers = &.{
.{ .name = "content-type", .value = "multipart/form-data; boundary=" ++ boundary },
},
});
defer req.deinit();
req.transfer_encoding = .{ .content_length = body.items.len };
try req.send();
try req.writeAll(body.items);
try req.finish();
try req.wait();
const response_body = try req.reader().readAllAlloc(allocator, 16 * 1024 * 1024);
defer allocator.free(response_body);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{response_body});
}
```

View File

@@ -0,0 +1,37 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
// Configuration is passed across the FFI as a JSON document.
// This combines OCR, chunking, image extraction, output format, and caching.
const config_json =
\\{
\\ "use_cache": true,
\\ "enable_quality_processing": true,
\\ "force_ocr": false,
\\ "ocr": {
\\ "backend": "tesseract",
\\ "language": "eng"
\\ },
\\ "chunking": {
\\ "max_characters": 800,
\\ "overlap": 100,
\\ "chunker_type": "markdown",
\\ "prepend_heading_context": true
\\ },
\\ "images": {
\\ "extract_images": true
\\ },
\\ "output_format": "markdown",
\\ "include_document_structure": true
\\}
;
const result_json = try kreuzberg.extract_file_sync("report.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("Result ({d} bytes of JSON):\n{s}\n", .{ result_json.len, result_json });
}
```

View File

@@ -0,0 +1,28 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const config_json = "{}";
const result_json = kreuzberg.extract_file_sync("document.pdf", null, config_json) catch |err| {
const stderr = std.io.getStdErr().writer();
switch (err) {
error.Io => try stderr.print("File error\n", .{}),
error.UnsupportedFormat => try stderr.print("Unsupported format\n", .{}),
error.Parsing => try stderr.print("Corrupt or invalid document\n", .{}),
error.MissingDependency => try stderr.print("Missing dependency — install required backend\n", .{}),
error.Ocr => try stderr.print("OCR processing failed\n", .{}),
error.OutOfMemory => try stderr.print("Out of memory\n", .{}),
else => try stderr.print("Extraction failed: {s}\n", .{@errorName(err)}),
}
if (kreuzberg._last_error()) |context| {
try stderr.print(" context: {s}\n", .{context});
}
return;
};
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,33 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
fn extract_text(bytes: []const u8, mime_type: []const u8) ![]u8 {
const config_json = "{}";
return kreuzberg.extract_bytes_sync(bytes, mime_type, config_json);
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const bytes = std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024) catch &[_]u8{};
defer if (bytes.len > 0) allocator.free(bytes);
const stderr = std.io.getStdErr().writer();
const result_json = extract_text(bytes, "application/pdf") catch |err| {
switch (err) {
error.UnsupportedFormat => try stderr.print("Format not supported\n", .{}),
error.Ocr => try stderr.print("OCR failed\n", .{}),
error.Validation => try stderr.print("Invalid input or configuration\n", .{}),
else => try stderr.print("Error: {s}\n", .{@errorName(err)}),
}
return;
};
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("Extracted {d} bytes of JSON\n", .{result_json.len});
}
```

View File

@@ -0,0 +1,25 @@
<!-- snippet:syntax-only -->
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// Note: the Zig binding is sync-only. There is no `extract_bytes` async variant —
// the FFI surface exposes blocking entry points that internally drive the global
// Tokio runtime. Use `extract_bytes_sync` from any thread.
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
defer allocator.free(content);
const config_json = "{}";
const result_json = try kreuzberg.extract_bytes_sync(content, "application/pdf", config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,20 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content = try std.fs.cwd().readFileAlloc(allocator, "document.pdf", 64 * 1024 * 1024);
defer allocator.free(content);
const config_json = "{}";
const result_json = try kreuzberg.extract_bytes_sync(content, "application/pdf", config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,18 @@
<!-- snippet:syntax-only -->
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// Note: the Zig binding is sync-only. There is no `extract_file` async variant —
// the FFI surface exposes blocking entry points that internally drive the global
// Tokio runtime. Use `extract_file_sync` from any thread.
pub fn main() !void {
const config_json = "{}";
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{result_json});
}
```

View File

@@ -0,0 +1,20 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const config_json = "{}";
const result_json = try kreuzberg.extract_file_sync("document.pdf", null, config_json);
defer std.heap.c_allocator.free(result_json);
const owned = try allocator.dupe(u8, result_json);
defer allocator.free(owned);
const stdout = std.io.getStdOut().writer();
try stdout.print("{s}\n", .{owned});
}
```