This commit is contained in:
49
docs/snippets/c/utils/chunking_rag.md
Normal file
49
docs/snippets/c/utils/chunking_rag.md
Normal file
@@ -0,0 +1,49 @@
|
||||
```c title="C"
|
||||
#include "kreuzberg.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(void) {
|
||||
const char *config_json =
|
||||
"{"
|
||||
"\"chunking\": {"
|
||||
"\"chunker_type\": \"character\","
|
||||
"\"max_characters\": 500,"
|
||||
"\"overlap\": 50,"
|
||||
"\"embedding\": {"
|
||||
"\"model\": {\"preset\": {\"name\": \"balanced\"}},"
|
||||
"\"normalize\": true"
|
||||
"}"
|
||||
"}"
|
||||
"}";
|
||||
|
||||
KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
|
||||
if (!config) {
|
||||
fprintf(stderr, "config parse failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
return 1;
|
||||
}
|
||||
|
||||
KREUZBERGExtractionResult *result =
|
||||
kreuzberg_extract_file_sync("research_paper.pdf", NULL, config);
|
||||
if (!result) {
|
||||
fprintf(stderr, "extraction failed (code %d): %s\n",
|
||||
kreuzberg_last_error_code(),
|
||||
kreuzberg_last_error_context());
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Each chunk JSON entry contains content, embedding, and metadata
|
||||
(chunk_index, total_chunks, byte_start, byte_end). Pipe this directly
|
||||
into a vector database client. */
|
||||
char *chunks_json = kreuzberg_extraction_result_chunks(result);
|
||||
printf("chunks (JSON):\n%s\n", chunks_json ? chunks_json : "[]");
|
||||
kreuzberg_free_string(chunks_json);
|
||||
|
||||
kreuzberg_extraction_result_free(result);
|
||||
kreuzberg_extraction_config_free(config);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user