Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,50 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 500,"
"\"overlap\": 50"
"},"
"\"pages\": {"
"\"extract_pages\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON, includes per-chunk first_page/last_page metadata):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
char *pages_json = kreuzberg_extraction_result_pages(result);
printf("pages (JSON): %s\n", pages_json ? pages_json : "[]");
kreuzberg_free_string(pages_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,43 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"markdown\","
"\"max_characters\": 500,"
"\"overlap\": 50,"
"\"prepend_heading_context\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON): %s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,47 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 500,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks (JSON, each item includes content, embedding, and metadata.chunk_index/total_chunks/byte_start/byte_end):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,48 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 1024,"
"\"overlap\": 100,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true,"
"\"batch_size\": 32,"
"\"show_download_progress\": false"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("chunks with embeddings (JSON):\n%s\n", chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.3,"
"\"ngram_range\": [1, 3],"
"\"language\": \"en\""
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *keywords_json = kreuzberg_extraction_result_extracted_keywords(result);
printf("keywords (JSON): %s\n", keywords_json ? keywords_json : "[]");
kreuzberg_free_string(keywords_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,46 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"keywords\": {"
"\"algorithm\": \"yake\","
"\"max_keywords\": 10,"
"\"min_score\": 0.3"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *keywords_json = kreuzberg_extraction_result_extracted_keywords(result);
if (keywords_json) {
printf("Keywords: %s\n", keywords_json);
kreuzberg_free_string(keywords_json);
} else {
printf("Keywords: (none)\n");
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,42 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": false"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *langs_json = kreuzberg_extraction_result_detected_languages(result);
printf("detected languages (JSON): %s\n", langs_json ? langs_json : "[]");
kreuzberg_free_string(langs_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,42 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"language_detection\": {"
"\"enabled\": true,"
"\"min_confidence\": 0.8,"
"\"detect_multiple\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("multilingual_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *langs_json = kreuzberg_extraction_result_detected_languages(result);
printf("Detected languages: %s\n", langs_json ? langs_json : "[]");
kreuzberg_free_string(langs_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,37 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
double score = kreuzberg_extraction_result_quality_score(result);
printf("quality score: %.2f\n", score);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,45 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"enable_quality_processing\": true"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("scanned_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
double score = kreuzberg_extraction_result_quality_score(result);
if (score < 0.5) {
printf("Warning: Low quality extraction (%.2f)\n", score);
} else {
printf("Quality score: %.2f\n", score);
}
char *warnings_json = kreuzberg_extraction_result_processing_warnings(result);
printf("processing warnings (JSON): %s\n", warnings_json ? warnings_json : "[]");
kreuzberg_free_string(warnings_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,41 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
printf("reduced content:\n%s\n", content ? content : "(empty)");
kreuzberg_free_string(content);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,44 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
const char *config_json =
"{"
"\"token_reduction\": {"
"\"mode\": \"moderate\","
"\"preserve_important_words\": true"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync("verbose_document.pdf", NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
char *content = kreuzberg_extraction_result_content(result);
if (content) {
printf("reduced content (%zu bytes):\n%s\n", strlen(content), content);
kreuzberg_free_string(content);
}
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```

View File

@@ -0,0 +1,55 @@
```c title="C"
#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>
int main(void) {
const char *document_path = "document.pdf";
const char *document_id = "doc-001";
const char *config_json =
"{"
"\"chunking\": {"
"\"chunker_type\": \"character\","
"\"max_characters\": 512,"
"\"overlap\": 50,"
"\"embedding\": {"
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
"\"normalize\": true,"
"\"batch_size\": 32"
"}"
"}"
"}";
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
if (!config) {
fprintf(stderr, "config parse failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
return 1;
}
KREUZBERGExtractionResult *result =
kreuzberg_extract_file_sync(document_path, NULL, config);
if (!result) {
fprintf(stderr, "extraction failed (code %d): %s\n",
kreuzberg_last_error_code(),
kreuzberg_last_error_context());
kreuzberg_extraction_config_free(config);
return 1;
}
/* The chunks JSON array carries content + embedding + metadata for each
chunk. Pass this directly to your vector database client (pgvector,
Qdrant, Pinecone, etc.) along with the document_id as a metadata field. */
char *chunks_json = kreuzberg_extraction_result_chunks(result);
printf("document_id: %s\n", document_id);
printf("chunks (JSON, ready to upsert into a vector DB):\n%s\n",
chunks_json ? chunks_json : "[]");
kreuzberg_free_string(chunks_json);
kreuzberg_extraction_result_free(result);
kreuzberg_extraction_config_free(config);
return 0;
}
```