Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/docs/snippets/r/config/chunking_config.md
+++ b/docs/snippets/r/config/chunking_config.md
@@ -0,0 +1,50 @@
+```r title="R"
+library(kreuzberg)
+
+# Example 1: Basic character-based chunking
+config <- list(
+  chunking = list(max_characters = 1000L, overlap = 200L)
+)
+
+json <- extract_file_sync("document.pdf", "application/pdf", config)
+result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
+
+num_chunks <- length(result$chunks)
+cat(sprintf("Document split into %d chunks\n", num_chunks))
+for (i in seq_len(min(3L, num_chunks))) {
+  cat(sprintf("Chunk %d: %d characters\n", i, nchar(result$chunks[[i]])))
+}
+```
+
+```r title="R - Markdown chunker with token-based sizing"
+library(kreuzberg)
+
+config <- list(
+  chunking = list(
+    chunker_type = "markdown",
+    sizing = list(
+      type = "tokenizer",
+      model = "Xenova/gpt-4o"
+    )
+  )
+)
+
+json <- extract_file_sync("document.md", "text/markdown", config)
+result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
+cat(sprintf("Markdown document split into %d chunks\n", length(result$chunks)))
+```
+
+```r title="R - Prepend heading context"
+library(kreuzberg)
+
+config <- list(
+  chunking = list(
+    chunker_type = "markdown",
+    prepend_heading_context = TRUE
+  )
+)
+
+json <- extract_file_sync("document.md", "text/markdown", config)
+result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
+cat(sprintf("Document split into %d chunks with prepended headings\n", length(result$chunks)))
+```