Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
```r
library(kreuzberg)
# Extract text from a PDF file
result <- extract_file_sync("document.pdf")
cat(result$content)
```

View File

@@ -0,0 +1,15 @@
```r title="R"
library(kreuzberg)
config <- ExtractionConfig$default()
json <- extract_file_sync(
path = "document.pdf",
mime_type = NULL,
config = config
)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(result$content)
cat(sprintf("\nMIME Type: %s\n", result$mime_type))
```

View File

@@ -0,0 +1,14 @@
```r title="R"
library(kreuzberg)
# Extract a file and inspect the result
result <- extract_file_sync("document.pdf")
# Print result information
cat(sprintf("MIME type: %s\n", mime_type(result)))
cat(sprintf("Content length: %d characters\n", nchar(content(result))))
cat(sprintf("Page count: %d\n", page_count(result)))
# View additional metadata
cat(sprintf("Detected language: %s\n", detected_language(result)))
```

View File

@@ -0,0 +1,19 @@
```r title="R"
library(kreuzberg)
# Configure OCR settings via a plain list mirroring the config JSON.
config <- list(
force_ocr = TRUE,
ocr = list(
backend = "tesseract",
language = "eng"
)
)
# Extract an image file with OCR enabled
json <- extract_file_sync("image.png", "image/png", config)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat("Extracted text from image:\n")
cat(result$content)
```

View File

@@ -0,0 +1,12 @@
```r title="R"
library(kreuzberg)
# Extract a PDF file
result <- extract_file_sync("example.pdf")
# Print a preview of the extracted content
content_preview <- substr(content(result), 1L, 200L)
cat("Content preview:\n")
cat(content_preview)
cat("\n...\n")
```

View File

@@ -0,0 +1,7 @@
```r title="R"
library(kreuzberg)
# Confirm the native extension loaded by listing registered extractors
extractors <- list_document_extractors()
cat(sprintf("kreuzberg ready: %d document extractors registered\n", length(extractors)))
```

View File

@@ -0,0 +1,7 @@
```r
# Install from source (requires Rust toolchain)
# install.packages("kreuzberg")
# Or install from GitHub
# remotes::install_github("kreuzberg-dev/kreuzberg", subdir = "packages/r")
```

View File

@@ -0,0 +1,20 @@
```r title="R"
library(kreuzberg)
# Extract a document
result <- extract_file_sync("document.docx")
# Access core content fields
cat(sprintf("MIME type: %s\n", mime_type(result)))
cat(sprintf("Content length: %d characters\n", nchar(content(result))))
# Access structured data
cat(sprintf("Number of tables: %d\n", length(result$tables)))
cat(sprintf("Detected language: %s\n", detected_language(result)))
# Access metadata
author <- metadata_field(result, "author")
if (!is.null(author)) {
cat(sprintf("Document author: %s\n", author))
}
```