Files
fil/docs/snippets/r/api/combining_all_features.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

35 lines
829 B
Markdown

```r title="R"
library(kreuzberg)
config_json <- jsonlite::toJSON(list(
output_format = "markdown",
force_ocr = TRUE,
extract_tables = TRUE,
extract_metadata = TRUE,
ocr = list(
backend = "tesseract",
language = "eng",
dpi = 300L
),
chunking = list(
chunker_type = "markdown",
max_characters = 1000L,
overlap = 200L
)
), auto_unbox = TRUE)
config <- ExtractionConfig$from_json(config_json)
json <- extract_file_sync(
path = "scanned_report.pdf",
mime_type = "application/pdf",
config = config
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("Chunks: %d\n", length(result$chunks)))
cat(sprintf("Tables: %d\n", length(result$tables)))
title <- if (!is.null(result$metadata$title)) result$metadata$title else "<none>"
cat(sprintf("Title: %s\n", title))
```