Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/docs/snippets/r/utils/vector_database_integration.md
+++ b/docs/snippets/r/utils/vector_database_integration.md
@@ -0,0 +1,26 @@
+```r title="R"
+library(kreuzberg)
+
+document_id <- "doc-001"
+
+config <- list(
+  chunking = list(
+    max_characters = 512L,
+    overlap = 50L,
+    embedding = list(
+      model = list(type = "preset", name = "balanced"),
+      normalize = TRUE,
+      batch_size = 32L
+    )
+  )
+)
+
+json <- extract_file_sync("document.pdf", "application/pdf", config)
+result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
+
+# Each chunk has $content, $embedding, and $metadata. Pass these directly
+# to a vector database client (pgvector, Qdrant, Pinecone, etc.) along with
+# the document_id stored as a metadata field.
+cat(sprintf("document_id: %s\n", document_id))
+cat(sprintf("chunks ready for upsert: %d\n", length(result$chunks)))
+```