Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
try kreuzberg.clear_ocr_backends();
try kreuzberg.clear_post_processors();
try kreuzberg.clear_validators();
const stdout = std.io.getStdOut().writer();
try stdout.print("All plugins cleared\n", .{});
}
```

View File

@@ -0,0 +1,41 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const MyEmbedder = struct {
pub fn dimensions(self: *MyEmbedder) usize {
_ = self;
return 768;
}
pub fn embed(self: *MyEmbedder, texts: [*c]const u8) ![]u8 {
_ = self;
_ = texts;
// `texts` is a JSON-encoded array of strings. Return a JSON-encoded
// 2D float array of shape [n_texts, dimensions]; the dispatcher
// validates the shape on the Rust side.
return error.Plugin;
}
};
pub fn main() !void {
var instance = MyEmbedder{};
var vtable = kreuzberg.make_embedding_backend_vtable(MyEmbedder, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("my-embedder");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_embedding_backend("my-embedder", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,175 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// VTable struct for DocumentExtractor; mirrors KreuzbergDocumentExtractorVTable.
const DocumentExtractorVTable = extern struct {
name_fn: ?*const fn (user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void,
version_fn: ?*const fn (user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void,
initialize_fn: ?*const fn (user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32,
shutdown_fn: ?*const fn (user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32,
extract_bytes: ?*const fn (
user_data: ?*anyopaque,
content: [*c]const u8,
content_len: usize,
mime_type: [*c]const u8,
config: [*c]const u8,
out_result: ?*?[*c]u8,
out_error: ?*?[*c]u8,
) callconv(.C) i32,
extract_file: ?*const fn (
user_data: ?*anyopaque,
path: [*c]const u8,
mime_type: [*c]const u8,
config: [*c]const u8,
out_result: ?*?[*c]u8,
out_error: ?*?[*c]u8,
) callconv(.C) i32,
supported_mime_types: ?*const fn (user_data: ?*anyopaque, out_result: ?*?[*c]u8) callconv(.C) i32,
priority: ?*const fn (user_data: ?*anyopaque) callconv(.C) i32,
can_handle: ?*const fn (
user_data: ?*anyopaque,
path: [*c]const u8,
mime_type: [*c]const u8,
) callconv(.C) i32,
as_sync_extractor: ?*const fn (user_data: ?*anyopaque) callconv(.C) i32,
free_user_data: ?*const fn (user_data: ?*anyopaque) callconv(.C) void,
};
extern "kreuzberg_ffi" fn kreuzberg_register_document_extractor(
name: [*c]const u8,
vtable: DocumentExtractorVTable,
user_data: ?*anyopaque,
out_error: ?*?[*c]u8,
) i32;
extern "kreuzberg_ffi" fn kreuzberg_unregister_document_extractor(
name: [*c]const u8,
out_error: ?*?[*c]u8,
) i32;
extern "kreuzberg_ffi" fn kreuzberg_free_string(ptr: [*c]u8) void;
// Implement callback functions for the extractor.
fn extract_bytes_fn(
user_data: ?*anyopaque,
content: [*c]const u8,
content_len: usize,
mime_type: [*c]const u8,
config: [*c]const u8,
out_result: ?*?[*c]u8,
out_error: ?*?[*c]u8,
) callconv(.C) i32 {
_ = user_data;
_ = content;
_ = content_len;
_ = config;
const mime_str = std.mem.sliceTo(mime_type, 0);
if (std.mem.eql(u8, mime_str, "application/json")) {
const result = "{\"content\": \"Extracted from JSON\"}";
const result_cstr = std.heap.c_allocator.allocSentinel(u8, result.len, 0) catch return 1;
@memcpy(result_cstr[0..result.len], result);
if (out_result) |ptr| ptr.* = result_cstr.ptr;
return 0;
}
if (out_error) |ptr| {
const err_msg = "Unsupported MIME type";
const err_cstr = std.heap.c_allocator.allocSentinel(u8, err_msg.len, 0) catch return 1;
@memcpy(err_cstr[0..err_msg.len], err_msg);
ptr.* = err_cstr.ptr;
}
return 1;
}
fn supported_mime_types_fn(user_data: ?*anyopaque, out_result: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
const mime_types = "[\"application/json\"]";
const cstr = std.heap.c_allocator.allocSentinel(u8, mime_types.len, 0) catch return 1;
@memcpy(cstr[0..mime_types.len], mime_types);
if (out_result) |ptr| ptr.* = cstr.ptr;
return 0;
}
fn name_fn(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
const name = "zig-json-extractor";
if (std.heap.c_allocator.allocSentinel(u8, name.len, 0)) |cstr| {
@memcpy(cstr[0..name.len], name);
if (out_name) |ptr| ptr.* = cstr.ptr;
}
}
fn version_fn(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
const version = "0.1.0";
if (std.heap.c_allocator.allocSentinel(u8, version.len, 0)) |cstr| {
@memcpy(cstr[0..version.len], version);
if (out_version) |ptr| ptr.* = cstr.ptr;
}
}
fn initialize_fn(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
_ = out_error;
return 0; // Success
}
fn shutdown_fn(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
_ = out_error;
return 0; // Success
}
fn priority_fn(user_data: ?*anyopaque) callconv(.C) i32 {
_ = user_data;
return 50; // Default priority
}
pub fn main() !void {
var out_error: ?[*c]u8 = null;
defer if (out_error) |ptr| kreuzberg_free_string(ptr);
// Build the vtable.
const vtable = DocumentExtractorVTable{
.name_fn = name_fn,
.version_fn = version_fn,
.initialize_fn = initialize_fn,
.shutdown_fn = shutdown_fn,
.extract_bytes = extract_bytes_fn,
.extract_file = null,
.supported_mime_types = supported_mime_types_fn,
.priority = priority_fn,
.can_handle = null,
.as_sync_extractor = null,
.free_user_data = null,
};
// Register the extractor with null user_data (no state).
const register_rc = kreuzberg_register_document_extractor(
"zig-json-extractor",
vtable,
null,
&out_error,
);
if (register_rc != 0) {
const stdout = std.io.getStdOut().writer();
if (out_error) |err_ptr| {
const err_msg = std.mem.sliceTo(err_ptr, 0);
try stdout.print("Registration failed: {s}\n", .{err_msg});
}
return;
}
const stdout = std.io.getStdOut().writer();
try stdout.print("Successfully registered zig-json-extractor\n", .{});
// Unregister the extractor when done.
out_error = null;
const unregister_rc = kreuzberg_unregister_document_extractor("zig-json-extractor", &out_error);
if (unregister_rc == 0) {
try stdout.print("Successfully unregistered zig-json-extractor\n", .{});
}
}
```

View File

@@ -0,0 +1,20 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
const ocr_backends = try kreuzberg.list_ocr_backends();
defer std.heap.c_allocator.free(ocr_backends);
const post_processors = try kreuzberg.list_post_processors();
defer std.heap.c_allocator.free(post_processors);
const validators = try kreuzberg.list_validators();
defer std.heap.c_allocator.free(validators);
const stdout = std.io.getStdOut().writer();
try stdout.print("OCR backends: {s}\n", .{ocr_backends});
try stdout.print("Post-processors: {s}\n", .{post_processors});
try stdout.print("Validators: {s}\n", .{validators});
}
```

View File

@@ -0,0 +1,47 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const MinLengthValidator = struct {
min_length: usize,
pub fn validate(self: *MinLengthValidator, result: [*c]const u8, config: [*c]const u8) !void {
_ = config;
const slice = std.mem.sliceTo(result, 0);
if (slice.len < self.min_length) return error.Validation;
}
pub fn should_validate(self: *MinLengthValidator, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn priority(self: *MinLengthValidator) i32 {
_ = self;
return 100;
}
};
pub fn main() !void {
var instance = MinLengthValidator{ .min_length = 50 };
var vtable = kreuzberg.make_validator_vtable(MinLengthValidator, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("min-length-validator");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_validator("min-length-validator", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,60 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const PdfMetadataExtractor = struct {
processed_count: u64 = 0,
pub fn process(self: *PdfMetadataExtractor, result: [*c]const u8, config: [*c]const u8) !void {
_ = result;
_ = config;
self.processed_count += 1;
// Parse the incoming JSON result, append PDF-specific metadata fields,
// and forward the enriched payload onward.
}
pub fn processing_stage(self: *PdfMetadataExtractor) [*c]const u8 {
_ = self;
return "Early";
}
pub fn should_process(self: *PdfMetadataExtractor, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = config;
const slice = std.mem.sliceTo(result, 0);
return if (std.mem.indexOf(u8, slice, "\"mime_type\":\"application/pdf\"") != null) 1 else 0;
}
pub fn estimated_duration_ms(self: *PdfMetadataExtractor, result: [*c]const u8) u64 {
_ = self;
_ = result;
return 2;
}
pub fn priority(self: *PdfMetadataExtractor) i32 {
_ = self;
return 80;
}
};
pub fn main() !void {
var instance = PdfMetadataExtractor{};
var vtable = kreuzberg.make_post_processor_vtable(PdfMetadataExtractor, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("pdf-metadata-extractor");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_post_processor("pdf-metadata-extractor", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,58 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const PdfOnlyProcessor = struct {
pub fn process(self: *PdfOnlyProcessor, result: [*c]const u8, config: [*c]const u8) !void {
_ = self;
_ = result;
_ = config;
// PDF-specific transforms go here. Parse the JSON result, mutate
// metadata/content, and forward through the pipeline.
}
pub fn processing_stage(self: *PdfOnlyProcessor) [*c]const u8 {
_ = self;
return "Middle";
}
pub fn should_process(self: *PdfOnlyProcessor, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = config;
const slice = std.mem.sliceTo(result, 0);
return if (std.mem.indexOf(u8, slice, "\"mime_type\":\"application/pdf\"") != null) 1 else 0;
}
pub fn estimated_duration_ms(self: *PdfOnlyProcessor, result: [*c]const u8) u64 {
_ = self;
_ = result;
return 5;
}
pub fn priority(self: *PdfOnlyProcessor) i32 {
_ = self;
return 70;
}
};
pub fn main() !void {
var instance = PdfOnlyProcessor{};
var vtable = kreuzberg.make_post_processor_vtable(PdfOnlyProcessor, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("pdf-only-processor");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_post_processor("pdf-only-processor", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,197 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
// Mirrors KreuzbergDocumentExtractorVTable from the C FFI.
const DocumentExtractorVTable = extern struct {
name_fn: ?*const fn (user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void,
version_fn: ?*const fn (user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void,
initialize_fn: ?*const fn (user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32,
shutdown_fn: ?*const fn (user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32,
extract_bytes: ?*const fn (
user_data: ?*anyopaque,
content: [*c]const u8,
content_len: usize,
mime_type: [*c]const u8,
config: [*c]const u8,
out_result: ?*?[*c]u8,
out_error: ?*?[*c]u8,
) callconv(.C) i32,
extract_file: ?*const fn (
user_data: ?*anyopaque,
path: [*c]const u8,
mime_type: [*c]const u8,
config: [*c]const u8,
out_result: ?*?[*c]u8,
out_error: ?*?[*c]u8,
) callconv(.C) i32,
supported_mime_types: ?*const fn (user_data: ?*anyopaque, out_result: ?*?[*c]u8) callconv(.C) i32,
priority: ?*const fn (user_data: ?*anyopaque) callconv(.C) i32,
can_handle: ?*const fn (
user_data: ?*anyopaque,
path: [*c]const u8,
mime_type: [*c]const u8,
) callconv(.C) i32,
as_sync_extractor: ?*const fn (user_data: ?*anyopaque) callconv(.C) i32,
free_user_data: ?*const fn (user_data: ?*anyopaque) callconv(.C) void,
};
// Simple state struct for the extractor instance.
const SimpleExtractorState = struct {
source_format: [:0]const u8,
supported_mimes: [:0]const u8,
};
extern "kreuzberg_ffi" fn kreuzberg_register_document_extractor(
name: [*c]const u8,
vtable: DocumentExtractorVTable,
user_data: ?*anyopaque,
out_error: ?*?[*c]u8,
) i32;
extern "kreuzberg_ffi" fn kreuzberg_free_string(ptr: [*c]u8) void;
// Callbacks for the custom extractor.
fn extract_bytes_impl(
user_data: ?*anyopaque,
content: [*c]const u8,
content_len: usize,
_: [*c]const u8,
_: [*c]const u8,
out_result: ?*?[*c]u8,
out_error: ?*?[*c]u8,
) callconv(.C) i32 {
const state: *SimpleExtractorState = @ptrCast(@alignCast(user_data));
_ = state;
// Minimal extraction: wrap content in JSON.
var arena = std.heap.ArenaAllocator.init(std.heap.c_allocator);
defer arena.deinit();
const allocator = arena.allocator();
const content_slice = content[0..content_len];
const result = std.fmt.allocPrint(
allocator,
"{{\"content\": \"{s}\", \"mime_type\": \"application/octet-stream\"}}",
.{content_slice},
) catch {
if (out_error) |ptr| {
const err = "OOM during extraction";
if (std.heap.c_allocator.allocSentinel(u8, err.len, 0)) |cstr| {
@memcpy(cstr[0..err.len], err);
ptr.* = cstr.ptr;
}
}
return 1;
};
const result_cstr = std.heap.c_allocator.allocSentinel(u8, result.len, 0) catch {
if (out_error) |ptr| {
const err = "OOM allocating result";
if (std.heap.c_allocator.allocSentinel(u8, err.len, 0)) |cstr| {
@memcpy(cstr[0..err.len], err);
ptr.* = cstr.ptr;
}
}
return 1;
};
@memcpy(result_cstr[0..result.len], result);
if (out_result) |ptr| ptr.* = result_cstr.ptr;
return 0;
}
fn supported_mimes_impl(user_data: ?*anyopaque, out_result: ?*?[*c]u8) callconv(.C) i32 {
const state: *SimpleExtractorState = @ptrCast(@alignCast(user_data));
const mimes = state.supported_mimes;
const mimes_cstr = std.heap.c_allocator.allocSentinel(u8, mimes.len, 0) catch return 1;
@memcpy(mimes_cstr[0..mimes.len], mimes);
if (out_result) |ptr| ptr.* = mimes_cstr.ptr;
return 0;
}
fn name_impl(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
const name = "zig-simple-extractor";
if (std.heap.c_allocator.allocSentinel(u8, name.len, 0)) |cstr| {
@memcpy(cstr[0..name.len], name);
if (out_name) |ptr| ptr.* = cstr.ptr;
}
}
fn version_impl(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
const version = "0.1.0";
if (std.heap.c_allocator.allocSentinel(u8, version.len, 0)) |cstr| {
@memcpy(cstr[0..version.len], version);
if (out_version) |ptr| ptr.* = cstr.ptr;
}
}
fn init_impl(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
_ = out_error;
return 0;
}
fn shutdown_impl(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
_ = out_error;
return 0;
}
fn priority_impl(user_data: ?*anyopaque) callconv(.C) i32 {
_ = user_data;
return 60; // Higher than default
}
fn cleanup_state(user_data: ?*anyopaque) callconv(.C) void {
const state: *SimpleExtractorState = @ptrCast(@alignCast(user_data));
std.heap.c_allocator.free(state.supported_mimes);
std.heap.c_allocator.destroy(state);
}
pub fn main() !void {
// Create extractor state on the heap.
const state = try std.heap.c_allocator.create(SimpleExtractorState);
state.source_format = try std.heap.c_allocator.dupeZ(u8, "custom");
state.supported_mimes = try std.heap.c_allocator.dupeZ(u8, "[\"application/octet-stream\"]");
var out_error: ?[*c]u8 = null;
defer if (out_error) |ptr| kreuzberg_free_string(ptr);
// Build and register the vtable.
const vtable = DocumentExtractorVTable{
.name_fn = name_impl,
.version_fn = version_impl,
.initialize_fn = init_impl,
.shutdown_fn = shutdown_impl,
.extract_bytes = extract_bytes_impl,
.extract_file = null,
.supported_mime_types = supported_mimes_impl,
.priority = priority_impl,
.can_handle = null,
.as_sync_extractor = null,
.free_user_data = cleanup_state,
};
const rc = kreuzberg_register_document_extractor(
"zig-simple-extractor",
vtable,
state,
&out_error,
);
const stdout = std.io.getStdOut().writer();
if (rc == 0) {
try stdout.print("Registered zig-simple-extractor with custom state\n", .{});
} else {
if (out_error) |err_ptr| {
const err_msg = std.mem.sliceTo(err_ptr, 0);
try stdout.print("Registration failed: {s}\n", .{err_msg});
} else {
try stdout.print("Registration failed (no error message)\n", .{});
}
}
}
```

View File

@@ -0,0 +1,73 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const LoggingProcessor = struct {
pub fn process(self: *LoggingProcessor, result: [*c]const u8, config: [*c]const u8) !void {
_ = self;
_ = config;
const slice = std.mem.sliceTo(result, 0);
std.log.info("post-processor invoked, payload bytes={d}", .{slice.len});
}
pub fn processing_stage(self: *LoggingProcessor) [*c]const u8 {
_ = self;
return "Late";
}
pub fn should_process(self: *LoggingProcessor, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn estimated_duration_ms(self: *LoggingProcessor, result: [*c]const u8) u64 {
_ = self;
_ = result;
return 0;
}
pub fn priority(self: *LoggingProcessor) i32 {
_ = self;
return 10;
}
};
pub fn main() !void {
var instance = LoggingProcessor{};
var vtable = kreuzberg.make_post_processor_vtable(LoggingProcessor, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("logging-processor");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
vtable.initialize_fn = struct {
fn thunk(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
_ = out_error;
std.log.info("logging-processor initialised", .{});
return 0;
}
}.thunk;
vtable.shutdown_fn = struct {
fn thunk(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = user_data;
_ = out_error;
std.log.info("logging-processor shut down", .{});
return 0;
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_post_processor("logging-processor", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,52 @@
```zig title="Zig"
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
const NoopValidator = struct {
pub fn validate(self: *NoopValidator, result: [*c]const u8, config: [*c]const u8) !void {
_ = self;
_ = result;
_ = config;
}
pub fn should_validate(self: *NoopValidator, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn priority(self: *NoopValidator) i32 {
_ = self;
return 50;
}
};
test "register and unregister validator" {
var instance = NoopValidator{};
var vtable = kreuzberg.make_validator_vtable(NoopValidator, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("noop-validator");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
try testing.expectEqual(@as(i32, 0), kreuzberg.register_validator("noop-validator", vtable, &instance, &out_error));
const validators = try kreuzberg.list_validators();
defer std.heap.c_allocator.free(validators);
try testing.expect(std.mem.indexOf(u8, validators, "noop-validator") != null);
try testing.expectEqual(@as(i32, 0), kreuzberg.unregister_validator("noop-validator", &out_error));
}
```

View File

@@ -0,0 +1,46 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const ContentValidator = struct {
pub fn validate(self: *ContentValidator, result: [*c]const u8, config: [*c]const u8) !void {
_ = self;
_ = config;
const slice = std.mem.sliceTo(result, 0);
if (slice.len == 0) return error.Validation;
}
pub fn should_validate(self: *ContentValidator, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn priority(self: *ContentValidator) i32 {
_ = self;
return 50;
}
};
pub fn main() !void {
var instance = ContentValidator{};
var vtable = kreuzberg.make_validator_vtable(ContentValidator, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("content-validator");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_validator("content-validator", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,49 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const QualityScoreValidator = struct {
threshold: f64,
pub fn validate(self: *QualityScoreValidator, result: [*c]const u8, config: [*c]const u8) !void {
_ = self;
_ = result;
_ = config;
// Parse `result` JSON, look up `metadata.additional.quality_score`,
// and return error.Validation if it falls below `self.threshold`.
}
pub fn should_validate(self: *QualityScoreValidator, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn priority(self: *QualityScoreValidator) i32 {
_ = self;
return 75;
}
};
pub fn main() !void {
var instance = QualityScoreValidator{ .threshold = 0.5 };
var vtable = kreuzberg.make_validator_vtable(QualityScoreValidator, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("quality-score-validator");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_validator("quality-score-validator", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,67 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const StatefulProcessor = struct {
call_count: std.atomic.Value(usize) = std.atomic.Value(usize).init(0),
pub fn process(self: *StatefulProcessor, result: [*c]const u8, config: [*c]const u8) !void {
_ = result;
_ = config;
_ = self.call_count.fetchAdd(1, .acq_rel);
}
pub fn processing_stage(self: *StatefulProcessor) [*c]const u8 {
_ = self;
return "Middle";
}
pub fn should_process(self: *StatefulProcessor, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn estimated_duration_ms(self: *StatefulProcessor, result: [*c]const u8) u64 {
_ = self;
_ = result;
return 1;
}
pub fn priority(self: *StatefulProcessor) i32 {
_ = self;
return 50;
}
};
pub fn main() !void {
var instance = StatefulProcessor{};
var vtable = kreuzberg.make_post_processor_vtable(StatefulProcessor, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("stateful-processor");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
vtable.shutdown_fn = struct {
fn thunk(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
_ = out_error;
const self: *StatefulProcessor = @ptrCast(@alignCast(user_data));
const count = self.call_count.load(.acquire);
std.log.info("stateful-processor invoked {d} times", .{count});
return 0;
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_post_processor("stateful-processor", vtable, &instance, &out_error);
}
```

View File

@@ -0,0 +1,13 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
pub fn main() !void {
var out_error: ?[*c]u8 = null;
_ = kreuzberg.unregister_post_processor("word-count", &out_error);
_ = kreuzberg.unregister_validator("min-length-validator", &out_error);
_ = kreuzberg.unregister_ocr_backend("custom-ocr", &out_error);
_ = kreuzberg.unregister_embedding_backend("my-embedder", &out_error);
}
```

View File

@@ -0,0 +1,58 @@
```zig title="Zig"
const std = @import("std");
const kreuzberg = @import("kreuzberg");
const WordCountProcessor = struct {
pub fn process(self: *WordCountProcessor, result: [*c]const u8, config: [*c]const u8) !void {
_ = self;
_ = result;
_ = config;
// The serialized result/config arrive as JSON strings; modify and emit
// an updated payload through your own pipeline as needed.
}
pub fn processing_stage(self: *WordCountProcessor) [*c]const u8 {
_ = self;
return "Early";
}
pub fn should_process(self: *WordCountProcessor, result: [*c]const u8, config: [*c]const u8) i32 {
_ = self;
_ = result;
_ = config;
return 1;
}
pub fn estimated_duration_ms(self: *WordCountProcessor, result: [*c]const u8) u64 {
_ = self;
_ = result;
return 1;
}
pub fn priority(self: *WordCountProcessor) i32 {
_ = self;
return 50;
}
};
pub fn main() !void {
var instance = WordCountProcessor{};
var vtable = kreuzberg.make_post_processor_vtable(WordCountProcessor, &instance);
vtable.name_fn = struct {
fn thunk(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_name) |ptr| ptr.* = @constCast("word-count");
}
}.thunk;
vtable.version_fn = struct {
fn thunk(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
_ = user_data;
if (out_version) |ptr| ptr.* = @constCast("1.0.0");
}
}.thunk;
var out_error: ?[*c]u8 = null;
_ = kreuzberg.register_post_processor("word-count", vtable, &instance, &out_error);
}
```