Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
struct ConfigBuilder *builder = kreuzberg_config_builder_new();
kreuzberg_config_builder_set_use_cache(builder, 1);
kreuzberg_config_builder_set_include_document_structure(builder, 1);
kreuzberg_config_builder_set_ocr(builder,
"{\"tesseract\":{\"language\":\"eng\"}}");
ExtractionConfig *config = kreuzberg_config_builder_build(builder);
struct CExtractionResult *result =
kreuzberg_extract_file_sync_with_config("scan.pdf",
kreuzberg_config_to_json(config));
if (result && result->success) {
printf("%s\n", result->content);
}
kreuzberg_free_result(result);
kreuzberg_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1000,"
"\"overlap\": 200,"
"\"trim\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,38 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"use_cache\": true,"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,62 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* The C FFI does not expose config-file auto-discovery directly. Load the
* file contents in your application and pass the JSON to
* kreuzberg_extraction_config_from_json. For TOML/YAML, convert in your
* application before calling the FFI. */
static char *read_text_file(const char *path) {
FILE *fp = fopen(path, "rb");
if (!fp) {
return NULL;
}
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *buf = (char *)malloc((size_t)size + 1);
if (!buf) {
fclose(fp);
return NULL;
}
fread(buf, 1, (size_t)size, fp);
buf[size] = '\0';
fclose(fp);
return buf;
}
int main(void) {
char *json = read_text_file("kreuzberg.json");
KREUZBERGExtractionConfig *config = json
? kreuzberg_extraction_config_from_json(json)
: kreuzberg_extraction_config_default();
free(json);
if (!config) {
fprintf(stderr, "config load failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,48 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"use_cache\": true,"
"\"enable_quality_processing\": true,"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng+deu\","
"\"tesseract_config\": {\"psm\": 6}"
"},"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1000,"
"\"overlap\": 200"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,37 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"include_document_structure\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,37 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"result_format\": \"element_based\""
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,47 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1000,"
"\"overlap\": 200,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"batch_size\": 16,"
"\"normalize\": true,"
"\"show_download_progress\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"output_format\": \"html\","
"\"html_output\": {"
"\"theme\": \"github\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.1,"
"\"ngram_range\": [1, 3],"
"\"language\": \"en\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"images\": {"
"\"extract_images\": true,"
"\"target_dpi\": 300,"
"\"max_image_dimension\": 4096,"
"\"auto_adjust_dpi\": true,"
"\"min_dpi\": 150,"
"\"max_dpi\": 600"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"pdf_options\": {"
"\"extract_images\": true,"
"\"passwords\": [\"password123\"],"
"\"extract_metadata\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("encrypted.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,45 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"pdf_options\": {"
"\"hierarchy\": {"
"\"enabled\": true,"
"\"detection_threshold\": 0.75,"
"\"ocr_coverage_threshold\": 0.8,"
"\"min_level\": 1,"
"\"max_level\": 5"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"postprocessor\": {"
"\"enabled\": true,"
"\"enabled_processors\": [\"whitespace_normalizer\", \"unicode_normalizer\"]"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,38 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true,"
"\"use_cache\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"ocr\": {"
"\"backend\": \"tesseract\","
"\"language\": \"eng+deu\","
"\"tesseract_config\": {"
"\"psm\": 6,"
"\"oem\": 3"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,40 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```