This commit is contained in:
33
docs/snippets/c/api/batch_extract_bytes_sync.md
Normal file
33
docs/snippets/c/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,33 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
/* Items is a JSON array of BatchBytesItem objects.
|
||||
* Each entry has "content" (array of byte integers), "mime_type", and an optional "config". */
|
||||
const char *items_json =
|
||||
"["
|
||||
" {\"content\": [72,101,108,108,111,33], \"mime_type\": \"text/plain\"},"
|
||||
" {\"content\": [87,111,114,108,100,33], \"mime_type\": \"text/plain\"}"
|
||||
"]";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
/* Returns a JSON array of ExtractionResult objects, or NULL on failure. */
|
||||
char *results_json =
|
||||
kreuzberg_batch_extract_bytes_sync(items_json, config);
|
||||
if (!results_json) {
|
||||
fprintf(stderr, "batch extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("%s\n", results_json);
|
||||
kreuzberg_free_string(results_json);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
34
docs/snippets/c/api/batch_extract_files_sync.md
Normal file
34
docs/snippets/c/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,34 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
/* Items is a JSON array of BatchFileItem objects.
|
||||
* Each entry has a "path" field and an optional "config" override. */
|
||||
const char *items_json =
|
||||
"["
|
||||
" {\"path\": \"doc1.pdf\"},"
|
||||
" {\"path\": \"doc2.docx\"},"
|
||||
" {\"path\": \"scan.png\", \"config\": {\"force_ocr\": true}}"
|
||||
"]";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
/* Returns a JSON array of ExtractionResult objects, or NULL on failure. */
|
||||
char *results_json =
|
||||
kreuzberg_batch_extract_files_sync(items_json, config);
|
||||
if (!results_json) {
|
||||
fprintf(stderr, "batch extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("%s\n", results_json);
|
||||
kreuzberg_free_string(results_json);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
73
docs/snippets/c/api/client_chunk_text.md
Normal file
73
docs/snippets/c/api/client_chunk_text.md
Normal file
@@ -0,0 +1,73 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```c title="C"
|
||||
#include <curl/curl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct response_buffer {
|
||||
char *data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
static size_t write_callback(void *contents, size_t size, size_t nmemb, void *userp) {
|
||||
size_t total = size * nmemb;
|
||||
struct response_buffer *buf = (struct response_buffer *)userp;
|
||||
char *resized = realloc(buf->data, buf->size + total + 1);
|
||||
if (!resized) {
|
||||
return 0;
|
||||
}
|
||||
buf->data = resized;
|
||||
memcpy(buf->data + buf->size, contents, total);
|
||||
buf->size += total;
|
||||
buf->data[buf->size] = '\0';
|
||||
return total;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
curl_global_init(CURL_GLOBAL_DEFAULT);
|
||||
|
||||
CURL *curl = curl_easy_init();
|
||||
if (!curl) {
|
||||
fprintf(stderr, "curl_easy_init failed\n");
|
||||
curl_global_cleanup();
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *body =
|
||||
"{"
|
||||
"\"text\": \"Lorem ipsum dolor sit amet, consectetur adipiscing elit.\","
|
||||
"\"chunker_type\": \"character\","
|
||||
"\"config\": {\"max_characters\": 256, \"overlap\": 32, \"trim\": true}"
|
||||
"}";
|
||||
|
||||
struct curl_slist *headers = NULL;
|
||||
headers = curl_slist_append(headers, "Content-Type: application/json");
|
||||
headers = curl_slist_append(headers, "Accept: application/json");
|
||||
|
||||
struct response_buffer response = {NULL, 0};
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:8000/chunk");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)strlen(body));
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
|
||||
|
||||
CURLcode rc = curl_easy_perform(curl);
|
||||
if (rc != CURLE_OK) {
|
||||
fprintf(stderr, "request failed: %s\n", curl_easy_strerror(rc));
|
||||
} else {
|
||||
long status = 0;
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
|
||||
printf("HTTP %ld\n%s\n", status, response.data ? response.data : "(empty)");
|
||||
}
|
||||
|
||||
free(response.data);
|
||||
curl_slist_free_all(headers);
|
||||
curl_easy_cleanup(curl);
|
||||
curl_global_cleanup();
|
||||
return rc == CURLE_OK ? 0 : 1;
|
||||
}
|
||||
```
|
||||
65
docs/snippets/c/api/client_extract_single_file.md
Normal file
65
docs/snippets/c/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,65 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```c title="C"
|
||||
#include <curl/curl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct response_buffer {
|
||||
char *data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
static size_t write_callback(void *contents, size_t size, size_t nmemb, void *userp) {
|
||||
size_t total = size * nmemb;
|
||||
struct response_buffer *buf = (struct response_buffer *)userp;
|
||||
char *resized = realloc(buf->data, buf->size + total + 1);
|
||||
if (!resized) {
|
||||
return 0;
|
||||
}
|
||||
buf->data = resized;
|
||||
memcpy(buf->data + buf->size, contents, total);
|
||||
buf->size += total;
|
||||
buf->data[buf->size] = '\0';
|
||||
return total;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
curl_global_init(CURL_GLOBAL_DEFAULT);
|
||||
|
||||
CURL *curl = curl_easy_init();
|
||||
if (!curl) {
|
||||
fprintf(stderr, "curl_easy_init failed\n");
|
||||
curl_global_cleanup();
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct response_buffer response = {NULL, 0};
|
||||
|
||||
curl_mime *form = curl_mime_init(curl);
|
||||
curl_mimepart *part = curl_mime_addpart(form);
|
||||
curl_mime_name(part, "file");
|
||||
curl_mime_filedata(part, "document.pdf");
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:8000/extract");
|
||||
curl_easy_setopt(curl, CURLOPT_MIMEPOST, form);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
|
||||
|
||||
CURLcode rc = curl_easy_perform(curl);
|
||||
if (rc != CURLE_OK) {
|
||||
fprintf(stderr, "request failed: %s\n", curl_easy_strerror(rc));
|
||||
} else {
|
||||
long status = 0;
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
|
||||
printf("HTTP %ld\n%s\n", status, response.data ? response.data : "(empty)");
|
||||
}
|
||||
|
||||
free(response.data);
|
||||
curl_mime_free(form);
|
||||
curl_easy_cleanup(curl);
|
||||
curl_global_cleanup();
|
||||
return rc == CURLE_OK ? 0 : 1;
|
||||
}
|
||||
```
|
||||
44
docs/snippets/c/api/combining_all_features.md
Normal file
44
docs/snippets/c/api/combining_all_features.md
Normal file
@@ -0,0 +1,44 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
/* Combine chunking, OCR, image extraction, and Markdown output in one config. */
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"output_format\": \"markdown\","
|
||||
"\"force_ocr\": true,"
|
||||
"\"ocr\": {\"backend\": \"tesseract\", \"languages\": [\"eng\", \"deu\"]},"
|
||||
"\"chunking\": {\"chunker_type\": \"character\", \"max_characters\": 1024, \"overlap\": 128, \"trim\": true},"
|
||||
"\"images\": {\"extract_images\": true, \"target_dpi\": 300, \"inject_placeholders\": true}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
int32_t code = kreuzberg_last_error_code();
|
||||
const char *message = kreuzberg_last_error_context();
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
code, message ? message : "(no message)");
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return code != 0 ? code : 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
29
docs/snippets/c/api/error_handling.md
Normal file
29
docs/snippets/c/api/error_handling.md
Normal file
@@ -0,0 +1,29 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
/* Pass an unsupported MIME type to trigger an error. */
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_bytes_sync(NULL, 0, "application/x-unknown", config);
|
||||
if (!result) {
|
||||
int32_t code = kreuzberg_last_error_code();
|
||||
const char *message = kreuzberg_last_error_context();
|
||||
/* message is valid until the next FFI call on this thread — copy if needed. */
|
||||
fprintf(stderr, "error %d: %s\n", code, message ? message : "(no message)");
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return code != 0 ? code : 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
47
docs/snippets/c/api/error_handling_extract.md
Normal file
47
docs/snippets/c/api/error_handling_extract.md
Normal file
@@ -0,0 +1,47 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(void) {
|
||||
/* Mixed-validity batch: a real PDF, a missing file, and an unsupported type. */
|
||||
const char *items_json =
|
||||
"["
|
||||
" {\"path\": \"document.pdf\"},"
|
||||
" {\"path\": \"does-not-exist.pdf\"},"
|
||||
" {\"path\": \"archive.unknownext\"}"
|
||||
"]";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
if (!config) {
|
||||
fprintf(stderr, "config init failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Returns a JSON array of ExtractionResult objects (one per input, in order),
|
||||
* or NULL on a system-level failure. Per-item errors are encoded inside
|
||||
* each result object's metadata (e.g. an "errors" array). */
|
||||
char *results_json = kreuzberg_batch_extract_files(items_json, config);
|
||||
if (!results_json) {
|
||||
int32_t code = kreuzberg_last_error_code();
|
||||
const char *message = kreuzberg_last_error_context();
|
||||
/* message is valid until the next FFI call on this thread — copy if needed. */
|
||||
fprintf(stderr, "batch extraction aborted (code %d): %s\n",
|
||||
code, message ? message : "(no message)");
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return code != 0 ? code : 1;
|
||||
}
|
||||
|
||||
/* Walk the returned JSON. A real consumer would feed this to a JSON parser
|
||||
* and inspect each result's metadata.errors / content fields. */
|
||||
size_t len = strlen(results_json);
|
||||
printf("results (%zu bytes):\n%s\n", len, results_json);
|
||||
|
||||
kreuzberg_free_string(results_json);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
35
docs/snippets/c/api/extract_bytes_async.md
Normal file
35
docs/snippets/c/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* kreuzberg_extract_bytes schedules work on the global Tokio runtime and
|
||||
* returns once extraction is complete. For true non-blocking use, call it
|
||||
* from a dedicated OS thread and synchronize via a semaphore or callback. */
|
||||
int main(void) {
|
||||
const char *text = "Hello, kreuzberg!";
|
||||
const uint8_t *bytes = (const uint8_t *)text;
|
||||
size_t len = strlen(text);
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_bytes(bytes, len, "text/plain", config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
32
docs/snippets/c/api/extract_bytes_sync.md
Normal file
32
docs/snippets/c/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,32 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(void) {
|
||||
const char *text = "Hello, kreuzberg!";
|
||||
const uint8_t *bytes = (const uint8_t *)text;
|
||||
size_t len = strlen(text);
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_bytes_sync(bytes, len, "text/plain", config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
30
docs/snippets/c/api/extract_file_async.md
Normal file
30
docs/snippets/c/api/extract_file_async.md
Normal file
@@ -0,0 +1,30 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* kreuzberg_extract_file schedules work on the global Tokio runtime and
|
||||
* returns once extraction is complete. For true non-blocking use, call it
|
||||
* from a dedicated OS thread and synchronize via a semaphore or callback. */
|
||||
int main(void) {
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
27
docs/snippets/c/api/extract_file_sync.md
Normal file
27
docs/snippets/c/api/extract_file_sync.md
Normal file
@@ -0,0 +1,27 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("document.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *content = kreuzberg_extraction_result_content(result);
|
||||
printf("%s\n", content ? content : "(empty)");
|
||||
kreuzberg_free_string(content);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user