Files
fil/docs/snippets/r/advanced/embedding_with_chunking.md

24 lines
581 B
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```r title="R"
library(kreuzberg)
config <- list(
chunking = list(max_characters = 1000L, overlap = 200L)
)
json <- extract_file_sync("document.pdf", "application/pdf", config)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("Preparing %d chunks for embedding:\n", length(result$chunks)))
embeddings_data <- list()
for (i in seq_along(result$chunks)) {
embeddings_data[[i]] <- list(
chunk_id = i,
text = result$chunks[[i]],
length = nchar(result$chunks[[i]])
)
}
cat(sprintf("Ready to embed %d chunks\n", length(embeddings_data)))
```