This commit is contained in:
19
docs/snippets/r/api/batch_extract_bytes_sync.md
Normal file
19
docs/snippets/r/api/batch_extract_bytes_sync.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
paths <- c("report.pdf", "notes.txt")
|
||||
mimes <- c("application/pdf", "text/plain")
|
||||
|
||||
items <- jsonlite::toJSON(lapply(seq_along(paths), function(i) {
|
||||
bytes <- readBin(paths[i], what = "raw", n = file.info(paths[i])$size)
|
||||
list(content = as.integer(bytes), mime_type = mimes[i])
|
||||
}), auto_unbox = TRUE)
|
||||
|
||||
json <- batch_extract_bytes_sync(items = items, config = ExtractionConfig$default())
|
||||
results <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
for (i in seq_along(results)) {
|
||||
cat(sprintf("[%d] mime=%s chars=%d\n",
|
||||
i, results[[i]]$mime_type, nchar(results[[i]]$content)))
|
||||
}
|
||||
```
|
||||
17
docs/snippets/r/api/batch_extract_files_sync.md
Normal file
17
docs/snippets/r/api/batch_extract_files_sync.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
items <- jsonlite::toJSON(list(
|
||||
list(path = "report.pdf"),
|
||||
list(path = "slides.pptx"),
|
||||
list(path = "data.xlsx")
|
||||
), auto_unbox = TRUE)
|
||||
|
||||
json <- batch_extract_files_sync(items = items, config = ExtractionConfig$default())
|
||||
results <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
for (i in seq_along(results)) {
|
||||
cat(sprintf("[%d] mime=%s chars=%d\n",
|
||||
i, results[[i]]$mime_type, nchar(results[[i]]$content)))
|
||||
}
|
||||
```
|
||||
29
docs/snippets/r/api/client_chunk_text.md
Normal file
29
docs/snippets/r/api/client_chunk_text.md
Normal file
@@ -0,0 +1,29 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
library(httr2)
|
||||
|
||||
payload <- list(
|
||||
text = "Your long text content here...",
|
||||
chunker_type = "text",
|
||||
config = list(
|
||||
max_characters = 1000,
|
||||
overlap = 50,
|
||||
trim = TRUE
|
||||
)
|
||||
)
|
||||
|
||||
response <- request("http://localhost:8000/chunk") |>
|
||||
req_method("POST") |>
|
||||
req_body_json(payload) |>
|
||||
req_perform()
|
||||
|
||||
result <- resp_body_json(response)
|
||||
|
||||
cat(sprintf("Created %d chunks\n", result$chunk_count))
|
||||
for (chunk in result$chunks) {
|
||||
preview <- substr(chunk$content, 1, 50)
|
||||
cat(sprintf("Chunk %d: %s...\n", chunk$chunk_index, preview))
|
||||
}
|
||||
```
|
||||
18
docs/snippets/r/api/client_extract_single_file.md
Normal file
18
docs/snippets/r/api/client_extract_single_file.md
Normal file
@@ -0,0 +1,18 @@
|
||||
<!-- snippet:syntax-only -->
|
||||
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
library(httr2)
|
||||
|
||||
response <- request("http://localhost:8000/extract") |>
|
||||
req_method("POST") |>
|
||||
req_multipart_part(
|
||||
name = "files",
|
||||
path = "document.pdf",
|
||||
type = "application/pdf"
|
||||
) |>
|
||||
req_perform()
|
||||
|
||||
data <- resp_body_json(response)
|
||||
cat(jsonlite::toJSON(data, auto_unbox = TRUE, pretty = TRUE))
|
||||
```
|
||||
34
docs/snippets/r/api/combining_all_features.md
Normal file
34
docs/snippets/r/api/combining_all_features.md
Normal file
@@ -0,0 +1,34 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
config_json <- jsonlite::toJSON(list(
|
||||
output_format = "markdown",
|
||||
force_ocr = TRUE,
|
||||
extract_tables = TRUE,
|
||||
extract_metadata = TRUE,
|
||||
ocr = list(
|
||||
backend = "tesseract",
|
||||
language = "eng",
|
||||
dpi = 300L
|
||||
),
|
||||
chunking = list(
|
||||
chunker_type = "markdown",
|
||||
max_characters = 1000L,
|
||||
overlap = 200L
|
||||
)
|
||||
), auto_unbox = TRUE)
|
||||
|
||||
config <- ExtractionConfig$from_json(config_json)
|
||||
|
||||
json <- extract_file_sync(
|
||||
path = "scanned_report.pdf",
|
||||
mime_type = "application/pdf",
|
||||
config = config
|
||||
)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat(sprintf("Chunks: %d\n", length(result$chunks)))
|
||||
cat(sprintf("Tables: %d\n", length(result$tables)))
|
||||
title <- if (!is.null(result$metadata$title)) result$metadata$title else "<none>"
|
||||
cat(sprintf("Title: %s\n", title))
|
||||
```
|
||||
26
docs/snippets/r/api/error_handling.md
Normal file
26
docs/snippets/r/api/error_handling.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
content <- charToRaw("Hello, world!")
|
||||
|
||||
result <- tryCatch(
|
||||
{
|
||||
json <- extract_bytes_sync(
|
||||
content = content,
|
||||
mime_type = "application/x-nonexistent",
|
||||
config = ExtractionConfig$default()
|
||||
)
|
||||
jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
},
|
||||
error = function(e) {
|
||||
message(sprintf("Extraction failed: %s", conditionMessage(e)))
|
||||
NULL
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(result)) {
|
||||
cat("No content extracted; falling back to original bytes\n")
|
||||
} else {
|
||||
cat(sprintf("Extracted %d characters\n", nchar(result$content)))
|
||||
}
|
||||
```
|
||||
35
docs/snippets/r/api/error_handling_extract.md
Normal file
35
docs/snippets/r/api/error_handling_extract.md
Normal file
@@ -0,0 +1,35 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
items <- jsonlite::toJSON(list(
|
||||
list(path = "doc1.pdf"),
|
||||
list(path = "doc2.docx"),
|
||||
list(path = "missing.html")
|
||||
), auto_unbox = TRUE)
|
||||
|
||||
result <- tryCatch(
|
||||
{
|
||||
json <- batch_extract_files_sync(items = items, config = ExtractionConfig$default())
|
||||
jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
},
|
||||
error = function(e) {
|
||||
message(sprintf("Batch extraction failed: %s", conditionMessage(e)))
|
||||
NULL
|
||||
}
|
||||
)
|
||||
|
||||
if (is.null(result)) {
|
||||
cat("No results returned\n")
|
||||
} else {
|
||||
for (i in seq_along(result)) {
|
||||
item <- result[[i]]
|
||||
err <- item$metadata$error
|
||||
if (!is.null(err)) {
|
||||
cat(sprintf("Document %d: ERROR - %s\n", i, err))
|
||||
} else {
|
||||
cat(sprintf("Document %d: %d chars, %d tables\n",
|
||||
i, nchar(item$content), length(item$tables)))
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
18
docs/snippets/r/api/extract_bytes_async.md
Normal file
18
docs/snippets/r/api/extract_bytes_async.md
Normal file
@@ -0,0 +1,18 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# extract_bytes is the async variant; the call blocks the calling R thread
|
||||
# until the underlying tokio task completes. Use future/promises if you need
|
||||
# to fan out without blocking.
|
||||
path <- "document.pdf"
|
||||
content <- readBin(path, what = "raw", n = file.info(path)$size)
|
||||
|
||||
json <- extract_bytes(
|
||||
content = content,
|
||||
mime_type = "application/pdf",
|
||||
config = ExtractionConfig$default()
|
||||
)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat(sprintf("Extracted %d characters\n", nchar(result$content)))
|
||||
```
|
||||
16
docs/snippets/r/api/extract_bytes_sync.md
Normal file
16
docs/snippets/r/api/extract_bytes_sync.md
Normal file
@@ -0,0 +1,16 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
path <- "document.pdf"
|
||||
content <- readBin(path, what = "raw", n = file.info(path)$size)
|
||||
|
||||
json <- extract_bytes_sync(
|
||||
content = content,
|
||||
mime_type = "application/pdf",
|
||||
config = ExtractionConfig$default()
|
||||
)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat(sprintf("MIME type: %s\n", result$mime_type))
|
||||
cat(sprintf("Content preview: %s\n", substr(result$content, 1, 200)))
|
||||
```
|
||||
15
docs/snippets/r/api/extract_file_async.md
Normal file
15
docs/snippets/r/api/extract_file_async.md
Normal file
@@ -0,0 +1,15 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# extract_file is the async variant; extendr drives the tokio runtime so the
|
||||
# call returns once extraction completes. R has no native async, so wrap with
|
||||
# the future/promises packages if non-blocking dispatch is required.
|
||||
json <- extract_file(
|
||||
path = "document.pdf",
|
||||
mime_type = "application/pdf",
|
||||
config = ExtractionConfig$default()
|
||||
)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat(sprintf("Extracted %d characters from %s\n", nchar(result$content), result$mime_type))
|
||||
```
|
||||
13
docs/snippets/r/api/extract_file_sync.md
Normal file
13
docs/snippets/r/api/extract_file_sync.md
Normal file
@@ -0,0 +1,13 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
json <- extract_file_sync(
|
||||
path = "document.pdf",
|
||||
mime_type = "application/pdf",
|
||||
config = ExtractionConfig$default()
|
||||
)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat(sprintf("MIME type: %s\n", result$mime_type))
|
||||
cat(sprintf("Content length: %d characters\n", nchar(result$content)))
|
||||
```
|
||||
Reference in New Issue
Block a user