Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
```r title="R"
library(kreuzberg)
paths <- c("report.pdf", "notes.txt")
mimes <- c("application/pdf", "text/plain")
items <- jsonlite::toJSON(lapply(seq_along(paths), function(i) {
bytes <- readBin(paths[i], what = "raw", n = file.info(paths[i])$size)
list(content = as.integer(bytes), mime_type = mimes[i])
}), auto_unbox = TRUE)
json <- batch_extract_bytes_sync(items = items, config = ExtractionConfig$default())
results <- jsonlite::fromJSON(json, simplifyVector = FALSE)
for (i in seq_along(results)) {
cat(sprintf("[%d] mime=%s chars=%d\n",
i, results[[i]]$mime_type, nchar(results[[i]]$content)))
}
```

View File

@@ -0,0 +1,17 @@
```r title="R"
library(kreuzberg)
items <- jsonlite::toJSON(list(
list(path = "report.pdf"),
list(path = "slides.pptx"),
list(path = "data.xlsx")
), auto_unbox = TRUE)
json <- batch_extract_files_sync(items = items, config = ExtractionConfig$default())
results <- jsonlite::fromJSON(json, simplifyVector = FALSE)
for (i in seq_along(results)) {
cat(sprintf("[%d] mime=%s chars=%d\n",
i, results[[i]]$mime_type, nchar(results[[i]]$content)))
}
```

View File

@@ -0,0 +1,29 @@
<!-- snippet:syntax-only -->
```r title="R"
library(kreuzberg)
library(httr2)
payload <- list(
text = "Your long text content here...",
chunker_type = "text",
config = list(
max_characters = 1000,
overlap = 50,
trim = TRUE
)
)
response <- request("http://localhost:8000/chunk") |>
req_method("POST") |>
req_body_json(payload) |>
req_perform()
result <- resp_body_json(response)
cat(sprintf("Created %d chunks\n", result$chunk_count))
for (chunk in result$chunks) {
preview <- substr(chunk$content, 1, 50)
cat(sprintf("Chunk %d: %s...\n", chunk$chunk_index, preview))
}
```

View File

@@ -0,0 +1,18 @@
<!-- snippet:syntax-only -->
```r title="R"
library(kreuzberg)
library(httr2)
response <- request("http://localhost:8000/extract") |>
req_method("POST") |>
req_multipart_part(
name = "files",
path = "document.pdf",
type = "application/pdf"
) |>
req_perform()
data <- resp_body_json(response)
cat(jsonlite::toJSON(data, auto_unbox = TRUE, pretty = TRUE))
```

View File

@@ -0,0 +1,34 @@
```r title="R"
library(kreuzberg)
config_json <- jsonlite::toJSON(list(
output_format = "markdown",
force_ocr = TRUE,
extract_tables = TRUE,
extract_metadata = TRUE,
ocr = list(
backend = "tesseract",
language = "eng",
dpi = 300L
),
chunking = list(
chunker_type = "markdown",
max_characters = 1000L,
overlap = 200L
)
), auto_unbox = TRUE)
config <- ExtractionConfig$from_json(config_json)
json <- extract_file_sync(
path = "scanned_report.pdf",
mime_type = "application/pdf",
config = config
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("Chunks: %d\n", length(result$chunks)))
cat(sprintf("Tables: %d\n", length(result$tables)))
title <- if (!is.null(result$metadata$title)) result$metadata$title else "<none>"
cat(sprintf("Title: %s\n", title))
```

View File

@@ -0,0 +1,26 @@
```r title="R"
library(kreuzberg)
content <- charToRaw("Hello, world!")
result <- tryCatch(
{
json <- extract_bytes_sync(
content = content,
mime_type = "application/x-nonexistent",
config = ExtractionConfig$default()
)
jsonlite::fromJSON(json, simplifyVector = FALSE)
},
error = function(e) {
message(sprintf("Extraction failed: %s", conditionMessage(e)))
NULL
}
)
if (is.null(result)) {
cat("No content extracted; falling back to original bytes\n")
} else {
cat(sprintf("Extracted %d characters\n", nchar(result$content)))
}
```

View File

@@ -0,0 +1,35 @@
```r title="R"
library(kreuzberg)
items <- jsonlite::toJSON(list(
list(path = "doc1.pdf"),
list(path = "doc2.docx"),
list(path = "missing.html")
), auto_unbox = TRUE)
result <- tryCatch(
{
json <- batch_extract_files_sync(items = items, config = ExtractionConfig$default())
jsonlite::fromJSON(json, simplifyVector = FALSE)
},
error = function(e) {
message(sprintf("Batch extraction failed: %s", conditionMessage(e)))
NULL
}
)
if (is.null(result)) {
cat("No results returned\n")
} else {
for (i in seq_along(result)) {
item <- result[[i]]
err <- item$metadata$error
if (!is.null(err)) {
cat(sprintf("Document %d: ERROR - %s\n", i, err))
} else {
cat(sprintf("Document %d: %d chars, %d tables\n",
i, nchar(item$content), length(item$tables)))
}
}
}
```

View File

@@ -0,0 +1,18 @@
```r title="R"
library(kreuzberg)
# extract_bytes is the async variant; the call blocks the calling R thread
# until the underlying tokio task completes. Use future/promises if you need
# to fan out without blocking.
path <- "document.pdf"
content <- readBin(path, what = "raw", n = file.info(path)$size)
json <- extract_bytes(
content = content,
mime_type = "application/pdf",
config = ExtractionConfig$default()
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("Extracted %d characters\n", nchar(result$content)))
```

View File

@@ -0,0 +1,16 @@
```r title="R"
library(kreuzberg)
path <- "document.pdf"
content <- readBin(path, what = "raw", n = file.info(path)$size)
json <- extract_bytes_sync(
content = content,
mime_type = "application/pdf",
config = ExtractionConfig$default()
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("MIME type: %s\n", result$mime_type))
cat(sprintf("Content preview: %s\n", substr(result$content, 1, 200)))
```

View File

@@ -0,0 +1,15 @@
```r title="R"
library(kreuzberg)
# extract_file is the async variant; extendr drives the tokio runtime so the
# call returns once extraction completes. R has no native async, so wrap with
# the future/promises packages if non-blocking dispatch is required.
json <- extract_file(
path = "document.pdf",
mime_type = "application/pdf",
config = ExtractionConfig$default()
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("Extracted %d characters from %s\n", nchar(result$content), result$mime_type))
```

View File

@@ -0,0 +1,13 @@
```r title="R"
library(kreuzberg)
json <- extract_file_sync(
path = "document.pdf",
mime_type = "application/pdf",
config = ExtractionConfig$default()
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("MIME type: %s\n", result$mime_type))
cat(sprintf("Content length: %d characters\n", nchar(result$content)))
```