This commit is contained in:
30
docs/snippets/c/plugins/clear_plugins.md
Normal file
30
docs/snippets/c/plugins/clear_plugins.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void) {
|
||||
if (kreuzberg_clear_post_processors() != 0) {
|
||||
fprintf(stderr, "clear post-processors failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (kreuzberg_clear_ocr_backends() != 0) {
|
||||
fprintf(stderr, "clear OCR backends failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (kreuzberg_clear_validators() != 0) {
|
||||
fprintf(stderr, "clear validators failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("All plugins cleared\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
115
docs/snippets/c/plugins/embedding_backend.md
Normal file
115
docs/snippets/c/plugins/embedding_backend.md
Normal file
@@ -0,0 +1,115 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Fixed embedding dimension produced by this backend. */
|
||||
#define EMBED_DIM 768
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static uintptr_t dimensions_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return (uintptr_t)EMBED_DIM;
|
||||
}
|
||||
|
||||
static int32_t embed_fn(
|
||||
const void *user_data,
|
||||
const char *texts,
|
||||
char **out_result,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)out_error;
|
||||
|
||||
/* `texts` is a JSON array of strings. Count entries by scanning quotes;
|
||||
* a real backend would parse the JSON and call its host model. */
|
||||
size_t count = 0;
|
||||
int in_string = 0;
|
||||
int escape = 0;
|
||||
for (const char *p = texts; *p; ++p) {
|
||||
if (escape) {
|
||||
escape = 0;
|
||||
} else if (*p == '\\') {
|
||||
escape = 1;
|
||||
} else if (*p == '"') {
|
||||
if (!in_string) {
|
||||
in_string = 1;
|
||||
count += 1;
|
||||
} else {
|
||||
in_string = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Build a JSON array of zero vectors of length EMBED_DIM, one per input. */
|
||||
/* Worst case bytes per entry: 2 brackets + EMBED_DIM * 4 ("0.0,") + comma. */
|
||||
size_t cap = 16 + count * (EMBED_DIM * 4 + 4);
|
||||
char *json = (char *)malloc(cap);
|
||||
if (!json) {
|
||||
*out_error = dup_cstr("allocation failure");
|
||||
return 1;
|
||||
}
|
||||
size_t pos = 0;
|
||||
json[pos++] = '[';
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
if (i > 0) json[pos++] = ',';
|
||||
json[pos++] = '[';
|
||||
for (size_t d = 0; d < EMBED_DIM; ++d) {
|
||||
if (d > 0) json[pos++] = ',';
|
||||
json[pos++] = '0';
|
||||
json[pos++] = '.';
|
||||
json[pos++] = '0';
|
||||
}
|
||||
json[pos++] = ']';
|
||||
}
|
||||
json[pos++] = ']';
|
||||
json[pos] = '\0';
|
||||
|
||||
*out_result = json;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void name_fn(const void *user_data, char **out_name) {
|
||||
(void)user_data;
|
||||
*out_name = dup_cstr("my-embedder");
|
||||
}
|
||||
|
||||
static void version_fn(const void *user_data, char **out_version) {
|
||||
(void)user_data;
|
||||
*out_version = dup_cstr("1.0.0");
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergEmbeddingBackendVTable vtable = {0};
|
||||
vtable.name_fn = name_fn;
|
||||
vtable.version_fn = version_fn;
|
||||
vtable.dimensions = dimensions_fn;
|
||||
vtable.embed = embed_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_embedding_backend(
|
||||
"my-embedder",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register embedding backend failed: %s\n",
|
||||
err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("my-embedder registered (dim=%d)\n", EMBED_DIM);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
27
docs/snippets/c/plugins/extractor_registration.md
Normal file
27
docs/snippets/c/plugins/extractor_registration.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/*
|
||||
* The kreuzberg C FFI does not expose a public function for registering
|
||||
* custom DocumentExtractor implementations from C. Document extractors must
|
||||
* be registered from Rust via `kreuzberg::plugins::registry::get_document_extractor_registry()`
|
||||
* before the C library is loaded.
|
||||
*
|
||||
* From C you can inspect which extractors the core has registered:
|
||||
*/
|
||||
|
||||
int main(void) {
|
||||
char *json = kreuzberg_list_document_extractors();
|
||||
if (!json) {
|
||||
fprintf(stderr, "list document extractors failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Registered document extractors: %s\n", json);
|
||||
kreuzberg_free_string(json);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
25
docs/snippets/c/plugins/list_plugins.md
Normal file
25
docs/snippets/c/plugins/list_plugins.md
Normal file
@@ -0,0 +1,25 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static void print_plugin_list(const char *label, char *json) {
|
||||
if (!json) {
|
||||
fprintf(stderr, "list %s failed (code %d): %s\n",
|
||||
label,
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return;
|
||||
}
|
||||
printf("%s: %s\n", label, json);
|
||||
kreuzberg_free_string(json);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
print_plugin_list("document extractors", kreuzberg_list_document_extractors());
|
||||
print_plugin_list("OCR backends", kreuzberg_list_ocr_backends());
|
||||
print_plugin_list("post-processors", kreuzberg_list_post_processors());
|
||||
print_plugin_list("validators", kreuzberg_list_validators());
|
||||
print_plugin_list("embedding presets", kreuzberg_list_embedding_presets());
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
90
docs/snippets/c/plugins/min_length_validator.md
Normal file
90
docs/snippets/c/plugins/min_length_validator.md
Normal file
@@ -0,0 +1,90 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* user_data carries the minimum length threshold. */
|
||||
typedef struct {
|
||||
size_t min_length;
|
||||
} MinLengthState;
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t validate_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)config;
|
||||
const MinLengthState *state = (const MinLengthState *)user_data;
|
||||
|
||||
/* `result` is a JSON string of ExtractionResult. We approximate the content
|
||||
* length check by scanning for the "content" field. Production plugins
|
||||
* should parse JSON properly. */
|
||||
const char *content = strstr(result, "\"content\":\"");
|
||||
size_t content_len = 0;
|
||||
if (content) {
|
||||
content += strlen("\"content\":\"");
|
||||
const char *end = strchr(content, '"');
|
||||
if (end) {
|
||||
content_len = (size_t)(end - content);
|
||||
}
|
||||
}
|
||||
|
||||
if (content_len < state->min_length) {
|
||||
char buf[128];
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Content too short: %zu < %zu characters",
|
||||
content_len,
|
||||
state->min_length);
|
||||
*out_error = dup_cstr(buf);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 100;
|
||||
}
|
||||
|
||||
static void free_user_data(void *user_data) {
|
||||
free(user_data);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
MinLengthState *state = (MinLengthState *)malloc(sizeof(MinLengthState));
|
||||
state->min_length = 100;
|
||||
|
||||
KREUZBERGKreuzbergValidatorVTable vtable = {0};
|
||||
vtable.validate = validate_fn;
|
||||
vtable.priority = priority_fn;
|
||||
vtable.free_user_data = free_user_data;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_validator(
|
||||
"min-length-validator",
|
||||
vtable,
|
||||
state,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register validator failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
free(state);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("min-length-validator registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
85
docs/snippets/c/plugins/pdf_metadata_extractor.md
Normal file
85
docs/snippets/c/plugins/pdf_metadata_extractor.md
Normal file
@@ -0,0 +1,85 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* The C FFI does not expose registration for custom DocumentExtractor
|
||||
* implementations. To add PDF-specific behaviour from C, register a
|
||||
* post-processor that runs only on PDF results and enriches them.
|
||||
*
|
||||
* The example below logs whenever the pipeline emits a PDF result, scoped
|
||||
* via the should_process hook so it never fires for other MIME types.
|
||||
*/
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
(void)out_error;
|
||||
printf("pdf-metadata-extractor: serialised PDF result is %zu bytes\n", strlen(result));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t processing_stage_fn(
|
||||
const void *user_data,
|
||||
char **out_result
|
||||
) {
|
||||
(void)user_data;
|
||||
*out_result = dup_cstr("\"Late\"");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t should_process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
return strstr(result, "\"mime_type\":\"application/pdf\"") != NULL;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 75;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
|
||||
vtable.process = process_fn;
|
||||
vtable.processing_stage = processing_stage_fn;
|
||||
vtable.should_process = should_process_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_post_processor(
|
||||
"pdf-metadata-extractor",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register post-processor failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("pdf-metadata-extractor registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
78
docs/snippets/c/plugins/pdf_only_processor.md
Normal file
78
docs/snippets/c/plugins/pdf_only_processor.md
Normal file
@@ -0,0 +1,78 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
(void)out_error;
|
||||
|
||||
printf("pdf-only-processor: handling result of length %zu\n", strlen(result));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t processing_stage_fn(
|
||||
const void *user_data,
|
||||
char **out_result
|
||||
) {
|
||||
(void)user_data;
|
||||
*out_result = dup_cstr("\"Middle\"");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t should_process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
/* Only process PDF mime types. */
|
||||
return strstr(result, "\"mime_type\":\"application/pdf\"") != NULL;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
|
||||
vtable.process = process_fn;
|
||||
vtable.processing_stage = processing_stage_fn;
|
||||
vtable.should_process = should_process_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_post_processor(
|
||||
"pdf-only-processor",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register post-processor failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("pdf-only-processor registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
42
docs/snippets/c/plugins/plugin_extractor.md
Normal file
42
docs/snippets/c/plugins/plugin_extractor.md
Normal file
@@ -0,0 +1,42 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* The C FFI exposes vtable-based registration for OCR backends, post-processors,
|
||||
* validators, and embedding backends. There is no public C entry point for
|
||||
* registering a custom DocumentExtractor — that must be done from Rust.
|
||||
*
|
||||
* From C you can still drive extraction for any MIME type the Rust core knows
|
||||
* how to handle. The example below feeds JSON bytes through the standard
|
||||
* extraction pipeline by passing the explicit MIME type.
|
||||
*/
|
||||
|
||||
int main(void) {
|
||||
const char *json_payload = "{\"message\":\"Hello, world!\"}";
|
||||
const uint8_t *bytes = (const uint8_t *)json_payload;
|
||||
uintptr_t bytes_len = (uintptr_t)strlen(json_payload);
|
||||
|
||||
KREUZBERGExtractionResult *result = kreuzberg_extract_bytes_sync(
|
||||
bytes,
|
||||
bytes_len,
|
||||
"application/json",
|
||||
NULL
|
||||
);
|
||||
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("Extracted JSON content: %s\n", content ? content : "(empty)");
|
||||
|
||||
kreuzberg_free_string(content);
|
||||
kreuzberg_extraction_result_free(result);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
91
docs/snippets/c/plugins/plugin_logging.md
Normal file
91
docs/snippets/c/plugins/plugin_logging.md
Normal file
@@ -0,0 +1,91 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Demonstrates structured logging from a post-processor plugin's lifecycle
|
||||
* hooks (initialize/shutdown) and from the per-result process callback. */
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t initialize_fn(const void *user_data, char **out_error) {
|
||||
(void)user_data;
|
||||
(void)out_error;
|
||||
fprintf(stderr, "[INFO] plugin=logging-demo event=initialize\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t shutdown_fn(const void *user_data, char **out_error) {
|
||||
(void)user_data;
|
||||
(void)out_error;
|
||||
fprintf(stderr, "[INFO] plugin=logging-demo event=shutdown\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
(void)out_error;
|
||||
|
||||
size_t len = strlen(result);
|
||||
fprintf(stderr,
|
||||
"[INFO] plugin=logging-demo event=process bytes=%zu\n",
|
||||
len);
|
||||
|
||||
if (strstr(result, "\"content\":\"\"") != NULL) {
|
||||
fprintf(stderr,
|
||||
"[WARN] plugin=logging-demo event=empty_content\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t processing_stage_fn(const void *user_data, char **out_result) {
|
||||
(void)user_data;
|
||||
*out_result = dup_cstr("\"Late\"");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
|
||||
vtable.initialize_fn = initialize_fn;
|
||||
vtable.shutdown_fn = shutdown_fn;
|
||||
vtable.process = process_fn;
|
||||
vtable.processing_stage = processing_stage_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_post_processor(
|
||||
"logging-demo",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "[ERROR] register post-processor failed: %s\n",
|
||||
err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("logging-demo post-processor registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
84
docs/snippets/c/plugins/plugin_testing.md
Normal file
84
docs/snippets/c/plugins/plugin_testing.md
Normal file
@@ -0,0 +1,84 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Round-trip test: register a no-op validator, confirm it appears in the
|
||||
* registry list, then unregister and confirm it disappears. */
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t validate_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)result;
|
||||
(void)config;
|
||||
(void)out_error;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
static int contains_name(const char *json, const char *name) {
|
||||
if (!json || !name) {
|
||||
return 0;
|
||||
}
|
||||
return strstr(json, name) != NULL;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
const char *plugin_name = "noop-validator";
|
||||
|
||||
KREUZBERGKreuzbergValidatorVTable vtable = {0};
|
||||
vtable.validate = validate_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
if (kreuzberg_register_validator(plugin_name, vtable, NULL, &err) != 0) {
|
||||
fprintf(stderr, "register failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *list_after_register = kreuzberg_list_validators();
|
||||
if (!contains_name(list_after_register, plugin_name)) {
|
||||
fprintf(stderr, "FAIL: validator missing after register\n");
|
||||
kreuzberg_free_string(list_after_register);
|
||||
return 1;
|
||||
}
|
||||
printf("PASS: %s present after register\n", plugin_name);
|
||||
kreuzberg_free_string(list_after_register);
|
||||
|
||||
if (kreuzberg_unregister_validator(plugin_name, &err) != 0) {
|
||||
fprintf(stderr, "unregister failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *list_after_unregister = kreuzberg_list_validators();
|
||||
if (contains_name(list_after_unregister, plugin_name)) {
|
||||
fprintf(stderr, "FAIL: validator still present after unregister\n");
|
||||
kreuzberg_free_string(list_after_unregister);
|
||||
return 1;
|
||||
}
|
||||
printf("PASS: %s absent after unregister\n", plugin_name);
|
||||
kreuzberg_free_string(list_after_unregister);
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
76
docs/snippets/c/plugins/plugin_validator.md
Normal file
76
docs/snippets/c/plugins/plugin_validator.md
Normal file
@@ -0,0 +1,76 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Minimal Validator skeleton: implements the required `validate` function
|
||||
* and the optional `priority` and `should_validate` hooks via the C vtable.
|
||||
*/
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t validate_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
|
||||
/* Reject results whose serialised form contains a clearly forbidden token. */
|
||||
if (strstr(result, "FORBIDDEN") != NULL) {
|
||||
*out_error = dup_cstr("Content contains forbidden token");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t should_validate_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)result;
|
||||
(void)config;
|
||||
return 1; /* always run */
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergValidatorVTable vtable = {0};
|
||||
vtable.validate = validate_fn;
|
||||
vtable.should_validate = should_validate_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_validator(
|
||||
"forbidden-token-validator",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register validator failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("forbidden-token-validator registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
70
docs/snippets/c/plugins/quality_score_validator.md
Normal file
70
docs/snippets/c/plugins/quality_score_validator.md
Normal file
@@ -0,0 +1,70 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t validate_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
|
||||
/* Look for a "quality_score" key inside the metadata.additional map.
|
||||
* Production plugins should parse the JSON properly. */
|
||||
double score = 0.0;
|
||||
const char *needle = "\"quality_score\":";
|
||||
const char *found = strstr(result, needle);
|
||||
if (found) {
|
||||
score = atof(found + strlen(needle));
|
||||
}
|
||||
|
||||
if (score < 0.5) {
|
||||
char buf[128];
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Quality score too low: %.2f < 0.50", score);
|
||||
*out_error = dup_cstr(buf);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergValidatorVTable vtable = {0};
|
||||
vtable.validate = validate_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_validator(
|
||||
"quality-score-validator",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register validator failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("quality-score-validator registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
101
docs/snippets/c/plugins/stateful_plugin.md
Normal file
101
docs/snippets/c/plugins/stateful_plugin.md
Normal file
@@ -0,0 +1,101 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Shared state lives in `user_data` and is forwarded to every vtable callback.
|
||||
* Use atomics or a mutex if more than one thread can call into the plugin. */
|
||||
|
||||
typedef struct {
|
||||
atomic_size_t call_count;
|
||||
} StatefulState;
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t initialize_fn(const void *user_data, char **out_error) {
|
||||
(void)out_error;
|
||||
StatefulState *state = (StatefulState *)user_data;
|
||||
atomic_store(&state->call_count, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t shutdown_fn(const void *user_data, char **out_error) {
|
||||
(void)out_error;
|
||||
const StatefulState *state = (const StatefulState *)user_data;
|
||||
size_t count = atomic_load(&state->call_count);
|
||||
fprintf(stderr, "stateful-plugin: shutdown after %zu calls\n", count);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)result;
|
||||
(void)config;
|
||||
(void)out_error;
|
||||
StatefulState *state = (StatefulState *)user_data;
|
||||
atomic_fetch_add(&state->call_count, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t processing_stage_fn(const void *user_data, char **out_result) {
|
||||
(void)user_data;
|
||||
*out_result = dup_cstr("\"Middle\"");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
static void free_user_data(void *user_data) {
|
||||
free(user_data);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
StatefulState *state = (StatefulState *)malloc(sizeof(StatefulState));
|
||||
if (!state) {
|
||||
return 1;
|
||||
}
|
||||
atomic_init(&state->call_count, 0);
|
||||
|
||||
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
|
||||
vtable.initialize_fn = initialize_fn;
|
||||
vtable.shutdown_fn = shutdown_fn;
|
||||
vtable.process = process_fn;
|
||||
vtable.processing_stage = processing_stage_fn;
|
||||
vtable.priority = priority_fn;
|
||||
vtable.free_user_data = free_user_data;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_post_processor(
|
||||
"stateful-plugin",
|
||||
vtable,
|
||||
state,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register post-processor failed: %s\n",
|
||||
err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
free(state);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("stateful-plugin registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
31
docs/snippets/c/plugins/unregister_plugins.md
Normal file
31
docs/snippets/c/plugins/unregister_plugins.md
Normal file
@@ -0,0 +1,31 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static int unregister_or_log(
|
||||
int32_t (*unregister_fn)(const char *, char **),
|
||||
const char *kind,
|
||||
const char *name
|
||||
) {
|
||||
char *err = NULL;
|
||||
int32_t rc = unregister_fn(name, &err);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "unregister %s '%s' failed: %s\n",
|
||||
kind,
|
||||
name,
|
||||
err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
int failures = 0;
|
||||
failures += unregister_or_log(kreuzberg_unregister_post_processor, "post-processor", "word-count");
|
||||
failures += unregister_or_log(kreuzberg_unregister_validator, "validator", "min-length-validator");
|
||||
failures += unregister_or_log(kreuzberg_unregister_ocr_backend, "OCR backend", "my-ocr");
|
||||
failures += unregister_or_log(kreuzberg_unregister_embedding_backend, "embedding backend", "my-embedder");
|
||||
return failures == 0 ? 0 : 1;
|
||||
}
|
||||
```
|
||||
92
docs/snippets/c/plugins/word_count_processor.md
Normal file
92
docs/snippets/c/plugins/word_count_processor.md
Normal file
@@ -0,0 +1,92 @@
|
||||
```c title="C"
|
||||
#include <kreuzberg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static char *dup_cstr(const char *s) {
|
||||
size_t len = strlen(s);
|
||||
char *out = (char *)malloc(len + 1);
|
||||
if (out) {
|
||||
memcpy(out, s, len + 1);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static int32_t process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config,
|
||||
char **out_error
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
(void)out_error;
|
||||
|
||||
/* The `result` JSON string is read-only at this layer; for a real
|
||||
* mutating post-processor, decode the JSON, mutate, and serialise back
|
||||
* via the kreuzberg ExtractionResult helpers in your host language. */
|
||||
size_t words = 0;
|
||||
int in_word = 0;
|
||||
for (const char *p = result; *p; ++p) {
|
||||
if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') {
|
||||
in_word = 0;
|
||||
} else if (!in_word) {
|
||||
in_word = 1;
|
||||
words += 1;
|
||||
}
|
||||
}
|
||||
printf("word-count: ~%zu tokens in serialised result\n", words);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t processing_stage_fn(
|
||||
const void *user_data,
|
||||
char **out_result
|
||||
) {
|
||||
(void)user_data;
|
||||
/* ProcessingStage is JSON-serialised; "Early" maps to ProcessingStage::Early. */
|
||||
*out_result = dup_cstr("\"Early\"");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t should_process_fn(
|
||||
const void *user_data,
|
||||
const char *result,
|
||||
const char *config
|
||||
) {
|
||||
(void)user_data;
|
||||
(void)config;
|
||||
/* Skip empty content. */
|
||||
return strstr(result, "\"content\":\"\"") == NULL;
|
||||
}
|
||||
|
||||
static int32_t priority_fn(const void *user_data) {
|
||||
(void)user_data;
|
||||
return 50;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
|
||||
vtable.process = process_fn;
|
||||
vtable.processing_stage = processing_stage_fn;
|
||||
vtable.should_process = should_process_fn;
|
||||
vtable.priority = priority_fn;
|
||||
|
||||
char *err = NULL;
|
||||
int32_t rc = kreuzberg_register_post_processor(
|
||||
"word-count",
|
||||
vtable,
|
||||
NULL,
|
||||
&err
|
||||
);
|
||||
if (rc != 0) {
|
||||
fprintf(stderr, "register post-processor failed: %s\n", err ? err : "(no detail)");
|
||||
kreuzberg_free_string(err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("word-count post-processor registered\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user