Files
fil/docs/snippets/c/utils/vector_database_integration.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

#include "kreuzberg.h"
#include <stdio.h>
#include <stdlib.h>

int main(void) {
    const char *document_path = "document.pdf";
    const char *document_id = "doc-001";

    const char *config_json =
        "{"
        "\"chunking\": {"
        "\"chunker_type\": \"character\","
        "\"max_characters\": 512,"
        "\"overlap\": 50,"
        "\"embedding\": {"
        "\"model\": {\"preset\": {\"name\": \"balanced\"}},"
        "\"normalize\": true,"
        "\"batch_size\": 32"
        "}"
        "}"
        "}";

    KREUZBERGExtractionConfig *config = kreuzberg_extraction_config_from_json(config_json);
    if (!config) {
        fprintf(stderr, "config parse failed (code %d): %s\n",
                kreuzberg_last_error_code(),
                kreuzberg_last_error_context());
        return 1;
    }

    KREUZBERGExtractionResult *result =
        kreuzberg_extract_file_sync(document_path, NULL, config);
    if (!result) {
        fprintf(stderr, "extraction failed (code %d): %s\n",
                kreuzberg_last_error_code(),
                kreuzberg_last_error_context());
        kreuzberg_extraction_config_free(config);
        return 1;
    }

    /* The chunks JSON array carries content + embedding + metadata for each
       chunk. Pass this directly to your vector database client (pgvector,
       Qdrant, Pinecone, etc.) along with the document_id as a metadata field. */
    char *chunks_json = kreuzberg_extraction_result_chunks(result);
    printf("document_id: %s\n", document_id);
    printf("chunks (JSON, ready to upsert into a vector DB):\n%s\n",
           chunks_json ? chunks_json : "[]");
    kreuzberg_free_string(chunks_json);

    kreuzberg_extraction_result_free(result);
    kreuzberg_extraction_config_free(config);
    return 0;
}