Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
```bash title="Bash"
# Default: http://127.0.0.1:8000
kreuzberg serve
# Custom host and port
kreuzberg serve -H 0.0.0.0 -p 3000
# With configuration file
kreuzberg serve --config kreuzberg.toml
```

View File

@@ -0,0 +1,24 @@
```csharp title="C#"
using System;
using System.Diagnostics;
class ApiServer
{
static void Main()
{
var processInfo = new ProcessStartInfo
{
FileName = "kreuzberg",
Arguments = "serve -H 0.0.0.0 -p 8000",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true
};
using (var process = Process.Start(processInfo))
{
process?.WaitForExit();
}
}
}
```

View File

@@ -0,0 +1,8 @@
```bash title="Bash"
# Run server on port 8000
docker run -d \n -p 8000:8000 \n ghcr.io/kreuzberg-dev/kreuzberg:latest \n serve -H 0.0.0.0 -p 8000
# With environment variables
docker run -d \n -e KREUZBERG_CORS_ORIGINS="https://myapp.com" \n -e KREUZBERG_MAX_MULTIPART_FIELD_BYTES=209715200 \n -p 8000:8000 \n ghcr.io/kreuzberg-dev/kreuzberg:latest \n serve -H 0.0.0.0 -p 8000
```

View File

@@ -0,0 +1,18 @@
```go title="Go"
package main
import (
"log"
"os/exec"
)
func main() {
cmd := exec.Command("kreuzberg", "serve", "-H", "0.0.0.0", "-p", "8000")
cmd.Stdout = log.Writer()
cmd.Stderr = log.Writer()
if err := cmd.Run(); err != nil {
log.Fatalf("failed to start server: %v", err)
}
}
```

View File

@@ -0,0 +1,19 @@
```java title="Java"
import java.io.IOException;
public class ApiServer {
public static void main(String[] args) {
try {
ProcessBuilder pb = new ProcessBuilder(
"kreuzberg", "serve", "-H", "0.0.0.0", "-p", "8000"
);
pb.inheritIO();
Process process = pb.start();
process.waitFor();
} catch (IOException | InterruptedException e) {
System.err.println("Failed to start server: " + e.getMessage());
}
}
}
```

View File

@@ -0,0 +1,6 @@
```python title="Python"
# Start server
import subprocess
subprocess.Popen(["python", "-m", "kreuzberg", "serve", "-H", "0.0.0.0", "-p", "8000"])
```

View File

@@ -0,0 +1,11 @@
```rust title="Rust"
use kreuzberg::{ExtractionConfig, api::serve_with_config};
#[tokio::main]
async fn main() -> kreuzberg::Result<()> {
let config = ExtractionConfig::discover()?;
serve_with_config("0.0.0.0", 8000, config).await?;
Ok(())
}
```

View File

@@ -0,0 +1,50 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 500,"
"\"overlap\": 50"
"},"
"\"pages\": {"
"\"extract_pages\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON, includes per-chunk first_page/last_page metadata):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
char *pages_json = kreuzberg_extraction_result_pages(result);
printf("pages (JSON): %s\n", pages_json ? pages_json : "[]");
kreuzberg_free_string(pages_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"markdown\","
"\"max_characters\": 500,"
"\"overlap\": 50,"
"\"prepend_heading_context\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON): %s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,47 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 500,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON, each item includes content, embedding, and metadata.chunk_index/total_chunks/byte_start/byte_end):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,48 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1024,"
"\"overlap\": 100,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true,"
"\"batch_size\": 32,"
"\"show_download_progress\": false"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks with embeddings (JSON):\n%s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.3,"
"\"ngram_range\": [1, 3],"
"\"language\": \"en\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *keywords_json = kreuzberg_extraction_result_extracted_keywords(result);
printf("keywords (JSON): %s\n", keywords_json ? keywords_json : "[]");
kreuzberg_free_string(keywords_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,46 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.3"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *keywords_json = kreuzberg_extraction_result_extracted_keywords(result);
if (keywords_json) {
printf("Keywords: %s\n", keywords_json);
kreuzberg_free_string(keywords_json);
} else {
printf("Keywords: (none)\n");
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,42 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": false"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *langs_json = kreuzberg_extraction_result_detected_languages(result);
printf("detected languages (JSON): %s\n", langs_json ? langs_json : "[]");
kreuzberg_free_string(langs_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,42 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("multilingual_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *langs_json = kreuzberg_extraction_result_detected_languages(result);
printf("Detected languages: %s\n", langs_json ? langs_json : "[]");
kreuzberg_free_string(langs_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,37 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
double score = kreuzberg_extraction_result_quality_score(result);
printf("quality score: %.2f\n", score);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,45 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
double score = kreuzberg_extraction_result_quality_score(result);
if (score < 0.5) {
printf("Warning: Low quality extraction (%.2f)\n", score);
} else {
printf("Quality score: %.2f\n", score);
}
char *warnings_json = kreuzberg_extraction_result_processing_warnings(result);
printf("processing warnings (JSON): %s\n", warnings_json ? warnings_json : "[]");
kreuzberg_free_string(warnings_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("reduced content:\n%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("verbose_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
if (content) {
printf("reduced content (%zu bytes):\n%s\n", strlen(content), content);
kreuzberg_free_string(content);
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,55 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *document_path = "document.pdf";
const char *document_id = "doc-001";
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 512,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true,"
"\"batch_size\": 32"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync(document_path, NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
/* The chunks JSON array carries content + embedding + metadata for each
chunk. Pass this directly to your vector database client (pgvector,
Qdrant, Pinecone, etc.) along with the document_id as a metadata field. */
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("document_id: %s\n", document_id);
printf("chunks (JSON, ready to upsert into a vector DB):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,33 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
/* Items is a JSON array of BatchBytesItem objects.
* Each entry has "content" (array of byte integers), "mime_type", and an optional "config". */
const char *items_json =
"["
" {\"content\": [72,101,108,108,111,33], \"mime_type\": \"text/plain\"},"
" {\"content\": [87,111,114,108,100,33], \"mime_type\": \"text/plain\"}"
"]";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
/* Returns a JSON array of ExtractionResult objects, or NULL on failure. */
char *results_json =
kreuzberg_batch_extract_bytes_sync(items_json, config);
if (!results_json) {
fprintf(stderr, "batch extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
printf("%s\n", results_json);
kreuzberg_free_string(results_json);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,34 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
/* Items is a JSON array of BatchFileItem objects.
* Each entry has a "path" field and an optional "config" override. */
const char *items_json =
"["
" {\"path\": \"doc1.pdf\"},"
" {\"path\": \"doc2.docx\"},"
" {\"path\": \"scan.png\", \"config\": {\"force_ocr\": true}}"
"]";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
/* Returns a JSON array of ExtractionResult objects, or NULL on failure. */
char *results_json =
kreuzberg_batch_extract_files_sync(items_json, config);
if (!results_json) {
fprintf(stderr, "batch extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
printf("%s\n", results_json);
kreuzberg_free_string(results_json);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,73 @@
<!-- snippet:syntax-only -->
```c title="C"
#include <curl/curl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct response_buffer {
char *data;
size_t size;
};
static size_t write_callback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t total = size * nmemb;
struct response_buffer *buf = (struct response_buffer *)userp;
char *resized = realloc(buf->data, buf->size + total + 1);
if (!resized) {
return 0;
}
buf->data = resized;
memcpy(buf->data + buf->size, contents, total);
buf->size += total;
buf->data[buf->size] = '\0';
return total;
}
int main(void) {
curl_global_init(CURL_GLOBAL_DEFAULT);
CURL *curl = curl_easy_init();
if (!curl) {
fprintf(stderr, "curl_easy_init failed\n");
curl_global_cleanup();
return 1;
}
const char *body =
"{"
"\"text\": \"Lorem ipsum dolor sit amet, consectetur adipiscing elit.\","
"\"chunker_type\": \"character\","
"\"config\": {\"max_characters\": 256, \"overlap\": 32, \"trim\": true}"
"}";
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "Content-Type: application/json");
headers = curl_slist_append(headers, "Accept: application/json");
struct response_buffer response = {NULL, 0};
curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:8000/chunk");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body);
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, (long)strlen(body));
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
CURLcode rc = curl_easy_perform(curl);
if (rc != CURLE_OK) {
fprintf(stderr, "request failed: %s\n", curl_easy_strerror(rc));
} else {
long status = 0;
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
printf("HTTP %ld\n%s\n", status, response.data ? response.data : "(empty)");
}
free(response.data);
curl_slist_free_all(headers);
curl_easy_cleanup(curl);
curl_global_cleanup();
return rc == CURLE_OK ? 0 : 1;
}
```

View File

@@ -0,0 +1,65 @@
<!-- snippet:syntax-only -->
```c title="C"
#include <curl/curl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct response_buffer {
char *data;
size_t size;
};
static size_t write_callback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t total = size * nmemb;
struct response_buffer *buf = (struct response_buffer *)userp;
char *resized = realloc(buf->data, buf->size + total + 1);
if (!resized) {
return 0;
}
buf->data = resized;
memcpy(buf->data + buf->size, contents, total);
buf->size += total;
buf->data[buf->size] = '\0';
return total;
}
int main(void) {
curl_global_init(CURL_GLOBAL_DEFAULT);
CURL *curl = curl_easy_init();
if (!curl) {
fprintf(stderr, "curl_easy_init failed\n");
curl_global_cleanup();
return 1;
}
struct response_buffer response = {NULL, 0};
curl_mime *form = curl_mime_init(curl);
curl_mimepart *part = curl_mime_addpart(form);
curl_mime_name(part, "file");
curl_mime_filedata(part, "document.pdf");
curl_easy_setopt(curl, CURLOPT_URL, "http://localhost:8000/extract");
curl_easy_setopt(curl, CURLOPT_MIMEPOST, form);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
CURLcode rc = curl_easy_perform(curl);
if (rc != CURLE_OK) {
fprintf(stderr, "request failed: %s\n", curl_easy_strerror(rc));
} else {
long status = 0;
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
printf("HTTP %ld\n%s\n", status, response.data ? response.data : "(empty)");
}
free(response.data);
curl_mime_free(form);
curl_easy_cleanup(curl);
curl_global_cleanup();
return rc == CURLE_OK ? 0 : 1;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
/* Combine chunking, OCR, image extraction, and Markdown output in one config. */
const char *config_json =
"{"
"\"output_format\": \"markdown\","
"\"force_ocr\": true,"
"\"ocr\": {\"backend\": \"tesseract\", \"languages\": [\"eng\", \"deu\"]},"
"\"chunking\": {\"chunker_type\": \"character\", \"max_characters\": 1024, \"overlap\": 128, \"trim\": true},"
"\"images\": {\"extract_images\": true, \"target_dpi\": 300, \"inject_placeholders\": true}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("document.pdf", NULL, config);
if (!result) {
int32_t code = kreuzberg_last_error_code();
const char *message = kreuzberg_last_error_context();
fprintf(stderr, "extraction failed (code %d): %s\n",
code, message ? message : "(no message)");
kreuzberg_extraction_config_free(config);
return code != 0 ? code : 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,29 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
/* Pass an unsupported MIME type to trigger an error. */
KREUZBERGExtractionResult *result =
kreuzberg_extract_bytes_sync(NULL, 0, "application/x-unknown", config);
if (!result) {
int32_t code = kreuzberg_last_error_code();
const char *message = kreuzberg_last_error_context();
/* message is valid until the next FFI call on this thread — copy if needed. */
fprintf(stderr, "error %d: %s\n", code, message ? message : "(no message)");
kreuzberg_extraction_config_free(config);
return code != 0 ? code : 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,47 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
/* Mixed-validity batch: a real PDF, a missing file, and an unsupported type. */
const char *items_json =
"["
" {\"path\": \"document.pdf\"},"
" {\"path\": \"does-not-exist.pdf\"},"
" {\"path\": \"archive.unknownext\"}"
"]";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
if (!config) {
fprintf(stderr, "config init failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
/* Returns a JSON array of ExtractionResult objects (one per input, in order),
* or NULL on a system-level failure. Per-item errors are encoded inside
* each result object's metadata (e.g. an "errors" array). */
char *results_json = kreuzberg_batch_extract_files(items_json, config);
if (!results_json) {
int32_t code = kreuzberg_last_error_code();
const char *message = kreuzberg_last_error_context();
/* message is valid until the next FFI call on this thread — copy if needed. */
fprintf(stderr, "batch extraction aborted (code %d): %s\n",
code, message ? message : "(no message)");
kreuzberg_extraction_config_free(config);
return code != 0 ? code : 1;
}
/* Walk the returned JSON. A real consumer would feed this to a JSON parser
* and inspect each result's metadata.errors / content fields. */
size_t len = strlen(results_json);
printf("results (%zu bytes):\n%s\n", len, results_json);
kreuzberg_free_string(results_json);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,35 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* kreuzberg_extract_bytes schedules work on the global Tokio runtime and
* returns once extraction is complete. For true non-blocking use, call it
* from a dedicated OS thread and synchronize via a semaphore or callback. */
int main(void) {
const char *text = "Hello, kreuzberg!";
const uint8_t *bytes = (const uint8_t *)text;
size_t len = strlen(text);
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
KREUZBERGExtractionResult *result =
kreuzberg_extract_bytes(bytes, len, "text/plain", config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,32 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char *text = "Hello, kreuzberg!";
const uint8_t *bytes = (const uint8_t *)text;
size_t len = strlen(text);
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
KREUZBERGExtractionResult *result =
kreuzberg_extract_bytes_sync(bytes, len, "text/plain", config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,30 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
/* kreuzberg_extract_file schedules work on the global Tokio runtime and
* returns once extraction is complete. For true non-blocking use, call it
* from a dedicated OS thread and synchronize via a semaphore or callback. */
int main(void) {
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,27 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,25 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
struct ConfigBuilder *builder = kreuzberg_config_builder_new();
kreuzberg_config_builder_set_use_cache(builder, 1);
kreuzberg_config_builder_set_include_document_structure(builder, 1);
kreuzberg_config_builder_set_ocr(builder,
"{\"tesseract\":{\"language\":\"eng\"}}");
ExtractionConfig *config = kreuzberg_config_builder_build(builder);
struct CExtractionResult *result =
kreuzberg_extract_file_sync_with_config("scan.pdf",
kreuzberg_config_to_json(config));
if (result && result->success) {
printf("%s\n", result->content);
}
kreuzberg_free_result(result);
kreuzberg_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1000,"
"\"overlap\": 200,"
"\"trim\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,38 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"use_cache\": true,"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,62 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* The C FFI does not expose config-file auto-discovery directly. Load the
* file contents in your application and pass the JSON to
* kreuzberg_extraction_config_from_json. For TOML/YAML, convert in your
* application before calling the FFI. */
static char *read_text_file(const char *path) {
FILE *fp = fopen(path, "rb");
if (!fp) {
return NULL;
}
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *buf = (char *)malloc((size_t)size + 1);
if (!buf) {
fclose(fp);
return NULL;
}
fread(buf, 1, (size_t)size, fp);
buf[size] = '\0';
fclose(fp);
return buf;
}
int main(void) {
char *json = read_text_file("kreuzberg.json");
KREUZBERGExtractionConfig *config = json
? kreuzberg_extraction_config_from_json(json)
: kreuzberg_extraction_config_default();
free(json);
if (!config) {
fprintf(stderr, "config load failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,48 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"use_cache\": true,"
"\"enable_quality_processing\": true,"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng+deu\","
"\"tesseract_config\": {\"psm\": 6}"
"},"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1000,"
"\"overlap\": 200"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,37 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"include_document_structure\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,37 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"result_format\": \"element_based\""
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,47 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1000,"
"\"overlap\": 200,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"batch_size\": 16,"
"\"normalize\": true,"
"\"show_download_progress\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"output_format\": \"html\","
"\"html_output\": {"
"\"theme\": \"github\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.1,"
"\"ngram_range\": [1, 3],"
"\"language\": \"en\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"images\": {"
"\"extract_images\": true,"
"\"target_dpi\": 300,"
"\"max_image_dimension\": 4096,"
"\"auto_adjust_dpi\": true,"
"\"min_dpi\": 150,"
"\"max_dpi\": 600"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"pdf_options\": {"
"\"extract_images\": true,"
"\"passwords\": [\"password123\"],"
"\"extract_metadata\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("encrypted.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,45 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"pdf_options\": {"
"\"hierarchy\": {"
"\"enabled\": true,"
"\"detection_threshold\": 0.75,"
"\"ocr_coverage_threshold\": 0.8,"
"\"min_level\": 1,"
"\"max_level\": 5"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"postprocessor\": {"
"\"enabled\": true,"
"\"enabled_processors\": [\"whitespace_normalizer\", \"unicode_normalizer\"]"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,38 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true,"
"\"use_cache\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng+deu\","
"\"tesseract_config\": {"
"\"psm\": 6,"
"\"oem\": 3"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,28 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_default();
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,39 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("document.pdf", NULL, NULL);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("content:\n%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
char *tables_json = kreuzberg_extraction_result_tables(result);
printf("tables (JSON): %s\n", tables_json ? tables_json : "[]");
kreuzberg_free_string(tables_json);
KREUZBERGMetadata *metadata = kreuzberg_extraction_result_metadata(result);
if (metadata) {
char *title = kreuzberg_metadata_title(metadata);
char *language = kreuzberg_metadata_language(metadata);
printf("title: %s\n", title ? title : "(none)");
printf("language: %s\n", language ? language : "(none)");
kreuzberg_free_string(title);
kreuzberg_free_string(language);
kreuzberg_metadata_free(metadata);
}
kreuzberg_extraction_result_free(result);
return 0;
}
```

View File

@@ -0,0 +1,46 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"force_ocr\": true,"
"\"ocr\": {\"backend\": \"tesseract\", \"language\": \"eng\"}"
"}";
KREUZBERGExtractionConfig *config =
kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config init failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
char *detected_languages = kreuzberg_extraction_result_detected_languages(result);
printf("detected languages: %s\n",
detected_languages ? detected_languages : "(none)");
kreuzberg_free_string(detected_languages);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,24 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("document.pdf", NULL, NULL);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
return 0;
}
```

View File

@@ -0,0 +1,12 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char *version = kreuzberg_version();
printf("kreuzberg version: %s\n", version ? version : "(unknown)");
return 0;
}
```

View File

@@ -0,0 +1,38 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
KREUZBERGExtractionResult *result =
kreuzberg_extract_file("document.pdf", NULL, NULL);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
if (content) {
printf("content length: %zu bytes\n", strlen(content));
printf("%s\n", content);
}
kreuzberg_free_string(content);
/* Tables are returned as a JSON array string. A real consumer would
* feed this into a JSON parser and walk each table's grid. */
char *tables_json = kreuzberg_extraction_result_tables(result);
if (tables_json) {
printf("tables JSON (%zu bytes):\n%s\n",
strlen(tables_json), tables_json);
} else {
printf("tables JSON: (none)\n");
}
kreuzberg_free_string(tables_json);
kreuzberg_extraction_result_free(result);
return 0;
}
```

View File

@@ -0,0 +1,57 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"structured_extraction\": {"
"\"schema\": {"
"\"type\": \"object\","
"\"properties\": {"
"\"title\": {\"type\": \"string\"},"
"\"authors\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},"
"\"date\": {\"type\": \"string\"}"
"},"
"\"required\": [\"title\", \"authors\", \"date\"],"
"\"additionalProperties\": false"
"},"
"\"llm\": {\"model\": \"openai/gpt-4o-mini\"},"
"\"strict\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *structured = kreuzberg_extraction_result_structured_output(result);
if (structured) {
printf("structured output (JSON):\n%s\n", structured);
kreuzberg_free_string(structured);
} else {
printf("structured output: (none)\n");
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```
<!-- snippet:syntax-only --> Requires network access to the configured LLM provider and a valid API key in the host environment.

View File

@@ -0,0 +1,56 @@
```c title="C"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/* The kreuzberg C FFI does not bundle an MCP client. Drive the kreuzberg
* CLI's stdio MCP transport from a C host that also links libkreuzberg. */
int main(void) {
int request_pipe[2];
int response_pipe[2];
if (pipe(request_pipe) < 0 || pipe(response_pipe) < 0) {
perror("pipe");
return 1;
}
pid_t pid = fork();
if (pid < 0) {
perror("fork");
return 1;
}
if (pid == 0) {
dup2(request_pipe[0], 0);
dup2(response_pipe[1], 1);
close(request_pipe[1]);
close(response_pipe[0]);
execlp("kreuzberg", "kreuzberg", "mcp", (char *)NULL);
perror("execlp");
_exit(127);
}
close(request_pipe[0]);
close(response_pipe[1]);
const char *request =
"{\"method\":\"tools/call\","
"\"params\":{\"name\":\"extract_file\","
"\"arguments\":{\"path\":\"document.pdf\",\"async\":true}}}\n";
if (write(request_pipe[1], request, strlen(request)) < 0) {
perror("write");
return 1;
}
close(request_pipe[1]);
char buffer[4096];
ssize_t bytes_read = read(response_pipe[0], buffer, sizeof(buffer) - 1);
if (bytes_read > 0) {
buffer[bytes_read] = '\0';
printf("%s", buffer);
}
close(response_pipe[0]);
return 0;
}
```
<!-- snippet:syntax-only --> No MCP client is exposed by libkreuzberg; this snippet drives the MCP CLI over stdio.

View File

@@ -0,0 +1,29 @@
```c title="C"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
/* The kreuzberg C FFI does not embed the MCP server. Spawn the kreuzberg
* CLI from a host process that uses libkreuzberg for in-process extraction. */
int main(void) {
pid_t pid = fork();
if (pid < 0) {
perror("fork");
return 1;
}
if (pid == 0) {
execlp("kreuzberg", "kreuzberg", "mcp", (char *)NULL);
perror("execlp");
_exit(127);
}
int status = 0;
if (waitpid(pid, &status, 0) < 0) {
perror("waitpid");
return 1;
}
return WIFEXITED(status) ? WEXITSTATUS(status) : 1;
}
```
<!-- snippet:syntax-only --> The MCP server is exposed only through the kreuzberg CLI; libkreuzberg's C FFI offers no MCP entry point. This snippet spawns the CLI from a host that already links against libkreuzberg.

View File

@@ -0,0 +1,56 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.9,"
"\"detect_multiple\": false"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
KREUZBERGMetadata *metadata = kreuzberg_extraction_result_metadata(result);
if (metadata) {
char *language = kreuzberg_metadata_language(metadata);
if (language) {
printf("Metadata language: %s\n", language);
kreuzberg_free_string(language);
}
kreuzberg_metadata_free(metadata);
}
char *detected_languages_json = kreuzberg_extraction_result_detected_languages(result);
if (detected_languages_json) {
printf("Detected languages: %s\n", detected_languages_json);
kreuzberg_free_string(detected_languages_json);
} else {
printf("No languages detected\n");
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,46 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("multilingual_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *detected_languages_json = kreuzberg_extraction_result_detected_languages(result);
if (detected_languages_json) {
printf("Detected languages (JSON array): %s\n", detected_languages_json);
kreuzberg_free_string(detected_languages_json);
} else {
printf("No languages detected\n");
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,27 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
struct CExtractionResult *result = kreuzberg_extract_file_sync("document.pdf");
if (!result || !result->success) {
fprintf(stderr, "Error: %s\n", kreuzberg_get_error_details().message);
return 1;
}
printf("Content: %s\n", result->content);
printf("MIME: %s\n", result->mime_type);
if (result->language)
printf("Language: %s\n", result->language);
if (result->date)
printf("Date: %s\n", result->date);
if (result->subject)
printf("Subject: %s\n", result->subject);
if (result->metadata_json)
printf("Metadata: %s\n", result->metadata_json);
kreuzberg_free_result(result);
return 0;
}
```

View File

@@ -0,0 +1,62 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"pages\": {"
"\"extract_pages\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
if (content) {
printf("Total content length: %zu bytes\n", strlen(content));
kreuzberg_free_string(content);
}
KREUZBERGMetadata *metadata = kreuzberg_extraction_result_metadata(result);
if (metadata) {
KREUZBERGPageStructure *pages = kreuzberg_metadata_pages(metadata);
if (pages) {
printf("Total pages: %zu\n", kreuzberg_page_structure_total_count(pages));
char *boundaries_json = kreuzberg_page_structure_boundaries(pages);
if (boundaries_json) {
printf("Page boundaries (JSON): %s\n", boundaries_json);
kreuzberg_free_string(boundaries_json);
} else {
printf("No page boundaries available\n");
}
kreuzberg_page_structure_free(pages);
} else {
printf("No page structure available\n");
}
kreuzberg_metadata_free(metadata);
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,54 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"pages\": {"
"\"extract_pages\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *pages_json = kreuzberg_extraction_result_pages(result);
if (pages_json) {
printf("Pages (JSON array): %s\n", pages_json);
kreuzberg_free_string(pages_json);
} else {
printf("No pages available\n");
}
KREUZBERGMetadata *metadata = kreuzberg_extraction_result_metadata(result);
if (metadata) {
KREUZBERGPageStructure *pages = kreuzberg_metadata_pages(metadata);
if (pages) {
printf("Total page count: %zu\n", kreuzberg_page_structure_total_count(pages));
kreuzberg_page_structure_free(pages);
}
kreuzberg_metadata_free(metadata);
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,21 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
struct CExtractionResult *result = kreuzberg_extract_file_sync("spreadsheet.xlsx");
if (!result || !result->success) {
fprintf(stderr, "Error: %s\n", kreuzberg_get_error_details().message);
return 1;
}
if (result->tables_json) {
printf("Tables (JSON): %s\n", result->tables_json);
} else {
printf("No tables found\n");
}
kreuzberg_free_result(result);
return 0;
}
```

View File

@@ -0,0 +1,60 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 512,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
if (chunks_json) {
printf("Chunks with embeddings (JSON): %s\n", chunks_json);
kreuzberg_free_string(chunks_json);
} else {
printf("No chunks produced\n");
}
KREUZBERGMetadata *metadata = kreuzberg_extraction_result_metadata(result);
if (metadata) {
char *title = kreuzberg_metadata_title(metadata);
if (title) {
printf("Document title: %s\n", title);
kreuzberg_free_string(title);
}
kreuzberg_metadata_free(metadata);
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,42 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
/* Cloud OCR backends are registered as custom plugins via the Rust core. */
/* Select a registered cloud backend by name through the OCR config. */
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"cloud-ocr\","
"\"language\": \"eng\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,39 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"images\": {"
"\"extract_images\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"images\": {"
"\"extract_images\": true,"
"\"target_dpi\": 300,"
"\"max_image_dimension\": 4096,"
"\"auto_adjust_dpi\": true,"
"\"min_dpi\": 150,"
"\"max_dpi\": 600"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"easyocr\","
"\"language\": \"en\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"paddleocr\","
"\"language\": \"en\","
"\"element_config\": {"
"\"include_elements\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,26 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
struct ConfigBuilder *builder = kreuzberg_config_builder_new();
kreuzberg_config_builder_set_ocr(builder,
"{\"tesseract\":{\"language\":\"eng\"}}");
ExtractionConfig *config = kreuzberg_config_builder_build(builder);
char *config_json = kreuzberg_config_to_json(config);
struct CExtractionResult *result =
kreuzberg_extract_file_sync_with_config("scanned.png", config_json);
if (result && result->success) {
printf("OCR text: %s\n", result->content);
} else {
fprintf(stderr, "OCR error: %s\n", kreuzberg_get_error_details().message);
}
kreuzberg_free_result(result);
kreuzberg_free_string(config_json);
kreuzberg_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"force_ocr\": true,"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng+deu+fra\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("multilingual.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"paddleocr\","
"\"language\": \"en\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,30 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
int main(void) {
if (kreuzberg_clear_post_processors() != 0) {
fprintf(stderr, "clear post-processors failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
if (kreuzberg_clear_ocr_backends() != 0) {
fprintf(stderr, "clear OCR backends failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
if (kreuzberg_clear_validators() != 0) {
fprintf(stderr, "clear validators failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
printf("All plugins cleared\n");
return 0;
}
```

View File

@@ -0,0 +1,115 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Fixed embedding dimension produced by this backend. */
#define EMBED_DIM 768
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static uintptr_t dimensions_fn(const void *user_data) {
(void)user_data;
return (uintptr_t)EMBED_DIM;
}
static int32_t embed_fn(
const void *user_data,
const char *texts,
char **out_result,
char **out_error
) {
(void)user_data;
(void)out_error;
/* `texts` is a JSON array of strings. Count entries by scanning quotes;
* a real backend would parse the JSON and call its host model. */
size_t count = 0;
int in_string = 0;
int escape = 0;
for (const char *p = texts; *p; ++p) {
if (escape) {
escape = 0;
} else if (*p == '\\') {
escape = 1;
} else if (*p == '"') {
if (!in_string) {
in_string = 1;
count += 1;
} else {
in_string = 0;
}
}
}
/* Build a JSON array of zero vectors of length EMBED_DIM, one per input. */
/* Worst case bytes per entry: 2 brackets + EMBED_DIM * 4 ("0.0,") + comma. */
size_t cap = 16 + count * (EMBED_DIM * 4 + 4);
char *json = (char *)malloc(cap);
if (!json) {
*out_error = dup_cstr("allocation failure");
return 1;
}
size_t pos = 0;
json[pos++] = '[';
for (size_t i = 0; i < count; ++i) {
if (i > 0) json[pos++] = ',';
json[pos++] = '[';
for (size_t d = 0; d < EMBED_DIM; ++d) {
if (d > 0) json[pos++] = ',';
json[pos++] = '0';
json[pos++] = '.';
json[pos++] = '0';
}
json[pos++] = ']';
}
json[pos++] = ']';
json[pos] = '\0';
*out_result = json;
return 0;
}
static void name_fn(const void *user_data, char **out_name) {
(void)user_data;
*out_name = dup_cstr("my-embedder");
}
static void version_fn(const void *user_data, char **out_version) {
(void)user_data;
*out_version = dup_cstr("1.0.0");
}
int main(void) {
KREUZBERGKreuzbergEmbeddingBackendVTable vtable = {0};
vtable.name_fn = name_fn;
vtable.version_fn = version_fn;
vtable.dimensions = dimensions_fn;
vtable.embed = embed_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_embedding_backend(
"my-embedder",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "register embedding backend failed: %s\n",
err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("my-embedder registered (dim=%d)\n", EMBED_DIM);
return 0;
}
```

View File

@@ -0,0 +1,27 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
/*
* The kreuzberg C FFI does not expose a public function for registering
* custom DocumentExtractor implementations from C. Document extractors must
* be registered from Rust via `kreuzberg::plugins::registry::get_document_extractor_registry()`
* before the C library is loaded.
*
* From C you can inspect which extractors the core has registered:
*/
int main(void) {
char *json = kreuzberg_list_document_extractors();
if (!json) {
fprintf(stderr, "list document extractors failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
printf("Registered document extractors: %s\n", json);
kreuzberg_free_string(json);
return 0;
}
```

View File

@@ -0,0 +1,25 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
static void print_plugin_list(const char *label, char *json) {
if (!json) {
fprintf(stderr, "list %s failed (code %d): %s\n",
label,
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return;
}
printf("%s: %s\n", label, json);
kreuzberg_free_string(json);
}
int main(void) {
print_plugin_list("document extractors", kreuzberg_list_document_extractors());
print_plugin_list("OCR backends", kreuzberg_list_ocr_backends());
print_plugin_list("post-processors", kreuzberg_list_post_processors());
print_plugin_list("validators", kreuzberg_list_validators());
print_plugin_list("embedding presets", kreuzberg_list_embedding_presets());
return 0;
}
```

View File

@@ -0,0 +1,90 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* user_data carries the minimum length threshold. */
typedef struct {
size_t min_length;
} MinLengthState;
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t validate_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)config;
const MinLengthState *state = (const MinLengthState *)user_data;
/* `result` is a JSON string of ExtractionResult. We approximate the content
* length check by scanning for the "content" field. Production plugins
* should parse JSON properly. */
const char *content = strstr(result, "\"content\":\"");
size_t content_len = 0;
if (content) {
content += strlen("\"content\":\"");
const char *end = strchr(content, '"');
if (end) {
content_len = (size_t)(end - content);
}
}
if (content_len < state->min_length) {
char buf[128];
snprintf(buf, sizeof(buf),
"Content too short: %zu < %zu characters",
content_len,
state->min_length);
*out_error = dup_cstr(buf);
return 1;
}
return 0;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 100;
}
static void free_user_data(void *user_data) {
free(user_data);
}
int main(void) {
MinLengthState *state = (MinLengthState *)malloc(sizeof(MinLengthState));
state->min_length = 100;
KREUZBERGKreuzbergValidatorVTable vtable = {0};
vtable.validate = validate_fn;
vtable.priority = priority_fn;
vtable.free_user_data = free_user_data;
char *err = NULL;
int32_t rc = kreuzberg_register_validator(
"min-length-validator",
vtable,
state,
&err
);
if (rc != 0) {
fprintf(stderr, "register validator failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
free(state);
return 1;
}
printf("min-length-validator registered\n");
return 0;
}
```

View File

@@ -0,0 +1,85 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* The C FFI does not expose registration for custom DocumentExtractor
* implementations. To add PDF-specific behaviour from C, register a
* post-processor that runs only on PDF results and enriches them.
*
* The example below logs whenever the pipeline emits a PDF result, scoped
* via the should_process hook so it never fires for other MIME types.
*/
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t process_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)config;
(void)out_error;
printf("pdf-metadata-extractor: serialised PDF result is %zu bytes\n", strlen(result));
return 0;
}
static int32_t processing_stage_fn(
const void *user_data,
char **out_result
) {
(void)user_data;
*out_result = dup_cstr("\"Late\"");
return 0;
}
static int32_t should_process_fn(
const void *user_data,
const char *result,
const char *config
) {
(void)user_data;
(void)config;
return strstr(result, "\"mime_type\":\"application/pdf\"") != NULL;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 75;
}
int main(void) {
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
vtable.process = process_fn;
vtable.processing_stage = processing_stage_fn;
vtable.should_process = should_process_fn;
vtable.priority = priority_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_post_processor(
"pdf-metadata-extractor",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "register post-processor failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("pdf-metadata-extractor registered\n");
return 0;
}
```

View File

@@ -0,0 +1,78 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t process_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)config;
(void)out_error;
printf("pdf-only-processor: handling result of length %zu\n", strlen(result));
return 0;
}
static int32_t processing_stage_fn(
const void *user_data,
char **out_result
) {
(void)user_data;
*out_result = dup_cstr("\"Middle\"");
return 0;
}
static int32_t should_process_fn(
const void *user_data,
const char *result,
const char *config
) {
(void)user_data;
(void)config;
/* Only process PDF mime types. */
return strstr(result, "\"mime_type\":\"application/pdf\"") != NULL;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
int main(void) {
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
vtable.process = process_fn;
vtable.processing_stage = processing_stage_fn;
vtable.should_process = should_process_fn;
vtable.priority = priority_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_post_processor(
"pdf-only-processor",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "register post-processor failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("pdf-only-processor registered\n");
return 0;
}
```

View File

@@ -0,0 +1,42 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <string.h>
/*
* The C FFI exposes vtable-based registration for OCR backends, post-processors,
* validators, and embedding backends. There is no public C entry point for
* registering a custom DocumentExtractor — that must be done from Rust.
*
* From C you can still drive extraction for any MIME type the Rust core knows
* how to handle. The example below feeds JSON bytes through the standard
* extraction pipeline by passing the explicit MIME type.
*/
int main(void) {
const char *json_payload = "{\"message\":\"Hello, world!\"}";
const uint8_t *bytes = (const uint8_t *)json_payload;
uintptr_t bytes_len = (uintptr_t)strlen(json_payload);
KREUZBERGExtractionResult *result = kreuzberg_extract_bytes_sync(
bytes,
bytes_len,
"application/json",
NULL
);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("Extracted JSON content: %s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
return 0;
}
```

View File

@@ -0,0 +1,91 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Demonstrates structured logging from a post-processor plugin's lifecycle
* hooks (initialize/shutdown) and from the per-result process callback. */
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t initialize_fn(const void *user_data, char **out_error) {
(void)user_data;
(void)out_error;
fprintf(stderr, "[INFO] plugin=logging-demo event=initialize\n");
return 0;
}
static int32_t shutdown_fn(const void *user_data, char **out_error) {
(void)user_data;
(void)out_error;
fprintf(stderr, "[INFO] plugin=logging-demo event=shutdown\n");
return 0;
}
static int32_t process_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)config;
(void)out_error;
size_t len = strlen(result);
fprintf(stderr,
"[INFO] plugin=logging-demo event=process bytes=%zu\n",
len);
if (strstr(result, "\"content\":\"\"") != NULL) {
fprintf(stderr,
"[WARN] plugin=logging-demo event=empty_content\n");
}
return 0;
}
static int32_t processing_stage_fn(const void *user_data, char **out_result) {
(void)user_data;
*out_result = dup_cstr("\"Late\"");
return 0;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
int main(void) {
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
vtable.initialize_fn = initialize_fn;
vtable.shutdown_fn = shutdown_fn;
vtable.process = process_fn;
vtable.processing_stage = processing_stage_fn;
vtable.priority = priority_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_post_processor(
"logging-demo",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "[ERROR] register post-processor failed: %s\n",
err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("logging-demo post-processor registered\n");
return 0;
}
```

View File

@@ -0,0 +1,84 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Round-trip test: register a no-op validator, confirm it appears in the
* registry list, then unregister and confirm it disappears. */
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t validate_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)result;
(void)config;
(void)out_error;
return 0;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
static int contains_name(const char *json, const char *name) {
if (!json || !name) {
return 0;
}
return strstr(json, name) != NULL;
}
int main(void) {
const char *plugin_name = "noop-validator";
KREUZBERGKreuzbergValidatorVTable vtable = {0};
vtable.validate = validate_fn;
vtable.priority = priority_fn;
char *err = NULL;
if (kreuzberg_register_validator(plugin_name, vtable, NULL, &err) != 0) {
fprintf(stderr, "register failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
char *list_after_register = kreuzberg_list_validators();
if (!contains_name(list_after_register, plugin_name)) {
fprintf(stderr, "FAIL: validator missing after register\n");
kreuzberg_free_string(list_after_register);
return 1;
}
printf("PASS: %s present after register\n", plugin_name);
kreuzberg_free_string(list_after_register);
if (kreuzberg_unregister_validator(plugin_name, &err) != 0) {
fprintf(stderr, "unregister failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
char *list_after_unregister = kreuzberg_list_validators();
if (contains_name(list_after_unregister, plugin_name)) {
fprintf(stderr, "FAIL: validator still present after unregister\n");
kreuzberg_free_string(list_after_unregister);
return 1;
}
printf("PASS: %s absent after unregister\n", plugin_name);
kreuzberg_free_string(list_after_unregister);
return 0;
}
```

View File

@@ -0,0 +1,76 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* Minimal Validator skeleton: implements the required `validate` function
* and the optional `priority` and `should_validate` hooks via the C vtable.
*/
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t validate_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)config;
/* Reject results whose serialised form contains a clearly forbidden token. */
if (strstr(result, "FORBIDDEN") != NULL) {
*out_error = dup_cstr("Content contains forbidden token");
return 1;
}
return 0;
}
static int32_t should_validate_fn(
const void *user_data,
const char *result,
const char *config
) {
(void)user_data;
(void)result;
(void)config;
return 1; /* always run */
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
int main(void) {
KREUZBERGKreuzbergValidatorVTable vtable = {0};
vtable.validate = validate_fn;
vtable.should_validate = should_validate_fn;
vtable.priority = priority_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_validator(
"forbidden-token-validator",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "register validator failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("forbidden-token-validator registered\n");
return 0;
}
```

View File

@@ -0,0 +1,70 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t validate_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)config;
/* Look for a "quality_score" key inside the metadata.additional map.
* Production plugins should parse the JSON properly. */
double score = 0.0;
const char *needle = "\"quality_score\":";
const char *found = strstr(result, needle);
if (found) {
score = atof(found + strlen(needle));
}
if (score < 0.5) {
char buf[128];
snprintf(buf, sizeof(buf),
"Quality score too low: %.2f < 0.50", score);
*out_error = dup_cstr(buf);
return 1;
}
return 0;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
int main(void) {
KREUZBERGKreuzbergValidatorVTable vtable = {0};
vtable.validate = validate_fn;
vtable.priority = priority_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_validator(
"quality-score-validator",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "register validator failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("quality-score-validator registered\n");
return 0;
}
```

View File

@@ -0,0 +1,101 @@
```c title="C"
#include <kreuzberg.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Shared state lives in `user_data` and is forwarded to every vtable callback.
* Use atomics or a mutex if more than one thread can call into the plugin. */
typedef struct {
atomic_size_t call_count;
} StatefulState;
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t initialize_fn(const void *user_data, char **out_error) {
(void)out_error;
StatefulState *state = (StatefulState *)user_data;
atomic_store(&state->call_count, 0);
return 0;
}
static int32_t shutdown_fn(const void *user_data, char **out_error) {
(void)out_error;
const StatefulState *state = (const StatefulState *)user_data;
size_t count = atomic_load(&state->call_count);
fprintf(stderr, "stateful-plugin: shutdown after %zu calls\n", count);
return 0;
}
static int32_t process_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)result;
(void)config;
(void)out_error;
StatefulState *state = (StatefulState *)user_data;
atomic_fetch_add(&state->call_count, 1);
return 0;
}
static int32_t processing_stage_fn(const void *user_data, char **out_result) {
(void)user_data;
*out_result = dup_cstr("\"Middle\"");
return 0;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
static void free_user_data(void *user_data) {
free(user_data);
}
int main(void) {
StatefulState *state = (StatefulState *)malloc(sizeof(StatefulState));
if (!state) {
return 1;
}
atomic_init(&state->call_count, 0);
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
vtable.initialize_fn = initialize_fn;
vtable.shutdown_fn = shutdown_fn;
vtable.process = process_fn;
vtable.processing_stage = processing_stage_fn;
vtable.priority = priority_fn;
vtable.free_user_data = free_user_data;
char *err = NULL;
int32_t rc = kreuzberg_register_post_processor(
"stateful-plugin",
vtable,
state,
&err
);
if (rc != 0) {
fprintf(stderr, "register post-processor failed: %s\n",
err ? err : "(no detail)");
kreuzberg_free_string(err);
free(state);
return 1;
}
printf("stateful-plugin registered\n");
return 0;
}
```

View File

@@ -0,0 +1,31 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
static int unregister_or_log(
int32_t (*unregister_fn)(const char *, char **),
const char *kind,
const char *name
) {
char *err = NULL;
int32_t rc = unregister_fn(name, &err);
if (rc != 0) {
fprintf(stderr, "unregister %s '%s' failed: %s\n",
kind,
name,
err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
return 0;
}
int main(void) {
int failures = 0;
failures += unregister_or_log(kreuzberg_unregister_post_processor, "post-processor", "word-count");
failures += unregister_or_log(kreuzberg_unregister_validator, "validator", "min-length-validator");
failures += unregister_or_log(kreuzberg_unregister_ocr_backend, "OCR backend", "my-ocr");
failures += unregister_or_log(kreuzberg_unregister_embedding_backend, "embedding backend", "my-embedder");
return failures == 0 ? 0 : 1;
}
```

View File

@@ -0,0 +1,92 @@
```c title="C"
#include <kreuzberg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static char *dup_cstr(const char *s) {
size_t len = strlen(s);
char *out = (char *)malloc(len + 1);
if (out) {
memcpy(out, s, len + 1);
}
return out;
}
static int32_t process_fn(
const void *user_data,
const char *result,
const char *config,
char **out_error
) {
(void)user_data;
(void)config;
(void)out_error;
/* The `result` JSON string is read-only at this layer; for a real
* mutating post-processor, decode the JSON, mutate, and serialise back
* via the kreuzberg ExtractionResult helpers in your host language. */
size_t words = 0;
int in_word = 0;
for (const char *p = result; *p; ++p) {
if (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') {
in_word = 0;
} else if (!in_word) {
in_word = 1;
words += 1;
}
}
printf("word-count: ~%zu tokens in serialised result\n", words);
return 0;
}
static int32_t processing_stage_fn(
const void *user_data,
char **out_result
) {
(void)user_data;
/* ProcessingStage is JSON-serialised; "Early" maps to ProcessingStage::Early. */
*out_result = dup_cstr("\"Early\"");
return 0;
}
static int32_t should_process_fn(
const void *user_data,
const char *result,
const char *config
) {
(void)user_data;
(void)config;
/* Skip empty content. */
return strstr(result, "\"content\":\"\"") == NULL;
}
static int32_t priority_fn(const void *user_data) {
(void)user_data;
return 50;
}
int main(void) {
KREUZBERGKreuzbergPostProcessorVTable vtable = {0};
vtable.process = process_fn;
vtable.processing_stage = processing_stage_fn;
vtable.should_process = should_process_fn;
vtable.priority = priority_fn;
char *err = NULL;
int32_t rc = kreuzberg_register_post_processor(
"word-count",
vtable,
NULL,
&err
);
if (rc != 0) {
fprintf(stderr, "register post-processor failed: %s\n", err ? err : "(no detail)");
kreuzberg_free_string(err);
return 1;
}
printf("word-count post-processor registered\n");
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"max_characters\": 1500,"
"\"overlap\": 200"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON): %s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,49 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 500,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
/* Each chunk JSON entry contains content, embedding, and metadata
(chunk_index, total_chunks, byte_start, byte_end). Pipe this directly
into a vector database client. */
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON):\n%s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,48 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1024,"
"\"overlap\": 100,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true,"
"\"batch_size\": 32,"
"\"show_download_progress\": false"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks with embeddings (JSON):\n%s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,46 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.3"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *keywords_json = kreuzberg_extraction_result_extracted_keywords(result);
if (keywords_json) {
printf("Keywords: %s\n", keywords_json);
kreuzberg_free_string(keywords_json);
} else {
printf("Keywords: (none)\n");
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,45 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
double score = kreuzberg_extraction_result_quality_score(result);
if (score < 0.5) {
printf("Warning: low quality extraction (%.2f)\n", score);
} else {
printf("Quality score: %.2f\n", score);
}
char *warnings_json = kreuzberg_extraction_result_processing_warnings(result);
printf("processing warnings (JSON): %s\n", warnings_json ? warnings_json : "[]");
kreuzberg_free_string(warnings_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,39 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true"
"}";
KREUZBERGEmbeddingConfig *config = kreuzberg_embedding_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
/* Embed input is a JSON-encoded array of strings. */
const char *texts_json = "[\"Hello, world!\", \"Kreuzberg is fast\"]";
char *embeddings_json = kreuzberg_embed_texts(texts_json, config);
if (!embeddings_json) {
fprintf(stderr, "embedding failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_embedding_config_free(config);
return 1;
}
printf("embeddings (JSON, 2D float array):\n%s\n", embeddings_json);
kreuzberg_free_string(embeddings_json);
kreuzberg_embedding_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("reduced content:\n%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("verbose_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
if (content) {
printf("reduced content (%zu bytes):\n%s\n", strlen(content), content);
kreuzberg_free_string(content);
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,55 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *document_path = "document.pdf";
const char *document_id = "doc-001";
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 512,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true,"
"\"batch_size\": 32"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync(document_path, NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
/* The chunks JSON array carries content + embedding + metadata for each
chunk. Pass this directly to your vector database client (pgvector,
Qdrant, Pinecone, etc.) along with the document_id as a metadata field. */
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("document_id: %s\n", document_id);
printf("chunks (JSON, ready to upsert into a vector DB):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,7 @@
```bash title="Bash"
# Process multiple files
kreuzberg extract doc1.pdf doc2.docx doc3.pptx
# Use glob patterns
kreuzberg extract documents/**/*.pdf
```

Some files were not shown because too many files have changed in this diff Show More