Files
fil/docs/snippets/zig/plugins/extractor_registration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

5.7 KiB

const std = @import("std");
const kreuzberg = @import("kreuzberg");

// VTable struct for DocumentExtractor; mirrors KreuzbergDocumentExtractorVTable.
const DocumentExtractorVTable = extern struct {
    name_fn: ?*const fn (user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void,
    version_fn: ?*const fn (user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void,
    initialize_fn: ?*const fn (user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32,
    shutdown_fn: ?*const fn (user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32,
    extract_bytes: ?*const fn (
        user_data: ?*anyopaque,
        content: [*c]const u8,
        content_len: usize,
        mime_type: [*c]const u8,
        config: [*c]const u8,
        out_result: ?*?[*c]u8,
        out_error: ?*?[*c]u8,
    ) callconv(.C) i32,
    extract_file: ?*const fn (
        user_data: ?*anyopaque,
        path: [*c]const u8,
        mime_type: [*c]const u8,
        config: [*c]const u8,
        out_result: ?*?[*c]u8,
        out_error: ?*?[*c]u8,
    ) callconv(.C) i32,
    supported_mime_types: ?*const fn (user_data: ?*anyopaque, out_result: ?*?[*c]u8) callconv(.C) i32,
    priority: ?*const fn (user_data: ?*anyopaque) callconv(.C) i32,
    can_handle: ?*const fn (
        user_data: ?*anyopaque,
        path: [*c]const u8,
        mime_type: [*c]const u8,
    ) callconv(.C) i32,
    as_sync_extractor: ?*const fn (user_data: ?*anyopaque) callconv(.C) i32,
    free_user_data: ?*const fn (user_data: ?*anyopaque) callconv(.C) void,
};

extern "kreuzberg_ffi" fn kreuzberg_register_document_extractor(
    name: [*c]const u8,
    vtable: DocumentExtractorVTable,
    user_data: ?*anyopaque,
    out_error: ?*?[*c]u8,
) i32;

extern "kreuzberg_ffi" fn kreuzberg_unregister_document_extractor(
    name: [*c]const u8,
    out_error: ?*?[*c]u8,
) i32;

extern "kreuzberg_ffi" fn kreuzberg_free_string(ptr: [*c]u8) void;

// Implement callback functions for the extractor.
fn extract_bytes_fn(
    user_data: ?*anyopaque,
    content: [*c]const u8,
    content_len: usize,
    mime_type: [*c]const u8,
    config: [*c]const u8,
    out_result: ?*?[*c]u8,
    out_error: ?*?[*c]u8,
) callconv(.C) i32 {
    _ = user_data;
    _ = content;
    _ = content_len;
    _ = config;

    const mime_str = std.mem.sliceTo(mime_type, 0);
    if (std.mem.eql(u8, mime_str, "application/json")) {
        const result = "{\"content\": \"Extracted from JSON\"}";
        const result_cstr = std.heap.c_allocator.allocSentinel(u8, result.len, 0) catch return 1;
        @memcpy(result_cstr[0..result.len], result);
        if (out_result) |ptr| ptr.* = result_cstr.ptr;
        return 0;
    }
    if (out_error) |ptr| {
        const err_msg = "Unsupported MIME type";
        const err_cstr = std.heap.c_allocator.allocSentinel(u8, err_msg.len, 0) catch return 1;
        @memcpy(err_cstr[0..err_msg.len], err_msg);
        ptr.* = err_cstr.ptr;
    }
    return 1;
}

fn supported_mime_types_fn(user_data: ?*anyopaque, out_result: ?*?[*c]u8) callconv(.C) i32 {
    _ = user_data;
    const mime_types = "[\"application/json\"]";
    const cstr = std.heap.c_allocator.allocSentinel(u8, mime_types.len, 0) catch return 1;
    @memcpy(cstr[0..mime_types.len], mime_types);
    if (out_result) |ptr| ptr.* = cstr.ptr;
    return 0;
}

fn name_fn(user_data: ?*anyopaque, out_name: ?*?[*c]u8) callconv(.C) void {
    _ = user_data;
    const name = "zig-json-extractor";
    if (std.heap.c_allocator.allocSentinel(u8, name.len, 0)) |cstr| {
        @memcpy(cstr[0..name.len], name);
        if (out_name) |ptr| ptr.* = cstr.ptr;
    }
}

fn version_fn(user_data: ?*anyopaque, out_version: ?*?[*c]u8) callconv(.C) void {
    _ = user_data;
    const version = "0.1.0";
    if (std.heap.c_allocator.allocSentinel(u8, version.len, 0)) |cstr| {
        @memcpy(cstr[0..version.len], version);
        if (out_version) |ptr| ptr.* = cstr.ptr;
    }
}

fn initialize_fn(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
    _ = user_data;
    _ = out_error;
    return 0; // Success
}

fn shutdown_fn(user_data: ?*anyopaque, out_error: ?*?[*c]u8) callconv(.C) i32 {
    _ = user_data;
    _ = out_error;
    return 0; // Success
}

fn priority_fn(user_data: ?*anyopaque) callconv(.C) i32 {
    _ = user_data;
    return 50; // Default priority
}

pub fn main() !void {
    var out_error: ?[*c]u8 = null;
    defer if (out_error) |ptr| kreuzberg_free_string(ptr);

    // Build the vtable.
    const vtable = DocumentExtractorVTable{
        .name_fn = name_fn,
        .version_fn = version_fn,
        .initialize_fn = initialize_fn,
        .shutdown_fn = shutdown_fn,
        .extract_bytes = extract_bytes_fn,
        .extract_file = null,
        .supported_mime_types = supported_mime_types_fn,
        .priority = priority_fn,
        .can_handle = null,
        .as_sync_extractor = null,
        .free_user_data = null,
    };

    // Register the extractor with null user_data (no state).
    const register_rc = kreuzberg_register_document_extractor(
        "zig-json-extractor",
        vtable,
        null,
        &out_error,
    );

    if (register_rc != 0) {
        const stdout = std.io.getStdOut().writer();
        if (out_error) |err_ptr| {
            const err_msg = std.mem.sliceTo(err_ptr, 0);
            try stdout.print("Registration failed: {s}\n", .{err_msg});
        }
        return;
    }

    const stdout = std.io.getStdOut().writer();
    try stdout.print("Successfully registered zig-json-extractor\n", .{});

    // Unregister the extractor when done.
    out_error = null;
    const unregister_rc = kreuzberg_unregister_document_extractor("zig-json-extractor", &out_error);
    if (unregister_rc == 0) {
        try stdout.print("Successfully unregistered zig-json-extractor\n", .{});
    }
}