This commit is contained in:
7
docs/snippets/r/getting-started/basic_extraction.md
Normal file
7
docs/snippets/r/getting-started/basic_extraction.md
Normal file
@@ -0,0 +1,7 @@
|
||||
```r
|
||||
library(kreuzberg)
|
||||
|
||||
# Extract text from a PDF file
|
||||
result <- extract_file_sync("document.pdf")
|
||||
cat(result$content)
|
||||
```
|
||||
15
docs/snippets/r/getting-started/basic_usage.md
Normal file
15
docs/snippets/r/getting-started/basic_usage.md
Normal file
@@ -0,0 +1,15 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
config <- ExtractionConfig$default()
|
||||
|
||||
json <- extract_file_sync(
|
||||
path = "document.pdf",
|
||||
mime_type = NULL,
|
||||
config = config
|
||||
)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat(result$content)
|
||||
cat(sprintf("\nMIME Type: %s\n", result$mime_type))
|
||||
```
|
||||
14
docs/snippets/r/getting-started/extract_file.md
Normal file
14
docs/snippets/r/getting-started/extract_file.md
Normal file
@@ -0,0 +1,14 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# Extract a file and inspect the result
|
||||
result <- extract_file_sync("document.pdf")
|
||||
|
||||
# Print result information
|
||||
cat(sprintf("MIME type: %s\n", mime_type(result)))
|
||||
cat(sprintf("Content length: %d characters\n", nchar(content(result))))
|
||||
cat(sprintf("Page count: %d\n", page_count(result)))
|
||||
|
||||
# View additional metadata
|
||||
cat(sprintf("Detected language: %s\n", detected_language(result)))
|
||||
```
|
||||
19
docs/snippets/r/getting-started/extract_with_ocr.md
Normal file
19
docs/snippets/r/getting-started/extract_with_ocr.md
Normal file
@@ -0,0 +1,19 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# Configure OCR settings via a plain list mirroring the config JSON.
|
||||
config <- list(
|
||||
force_ocr = TRUE,
|
||||
ocr = list(
|
||||
backend = "tesseract",
|
||||
language = "eng"
|
||||
)
|
||||
)
|
||||
|
||||
# Extract an image file with OCR enabled
|
||||
json <- extract_file_sync("image.png", "image/png", config)
|
||||
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
|
||||
|
||||
cat("Extracted text from image:\n")
|
||||
cat(result$content)
|
||||
```
|
||||
12
docs/snippets/r/getting-started/hello_world.md
Normal file
12
docs/snippets/r/getting-started/hello_world.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# Extract a PDF file
|
||||
result <- extract_file_sync("example.pdf")
|
||||
|
||||
# Print a preview of the extracted content
|
||||
content_preview <- substr(content(result), 1L, 200L)
|
||||
cat("Content preview:\n")
|
||||
cat(content_preview)
|
||||
cat("\n...\n")
|
||||
```
|
||||
7
docs/snippets/r/getting-started/install_verify.md
Normal file
7
docs/snippets/r/getting-started/install_verify.md
Normal file
@@ -0,0 +1,7 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# Confirm the native extension loaded by listing registered extractors
|
||||
extractors <- list_document_extractors()
|
||||
cat(sprintf("kreuzberg ready: %d document extractors registered\n", length(extractors)))
|
||||
```
|
||||
7
docs/snippets/r/getting-started/installation.md
Normal file
7
docs/snippets/r/getting-started/installation.md
Normal file
@@ -0,0 +1,7 @@
|
||||
```r
|
||||
# Install from source (requires Rust toolchain)
|
||||
# install.packages("kreuzberg")
|
||||
|
||||
# Or install from GitHub
|
||||
# remotes::install_github("kreuzberg-dev/kreuzberg", subdir = "packages/r")
|
||||
```
|
||||
20
docs/snippets/r/getting-started/read_content.md
Normal file
20
docs/snippets/r/getting-started/read_content.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```r title="R"
|
||||
library(kreuzberg)
|
||||
|
||||
# Extract a document
|
||||
result <- extract_file_sync("document.docx")
|
||||
|
||||
# Access core content fields
|
||||
cat(sprintf("MIME type: %s\n", mime_type(result)))
|
||||
cat(sprintf("Content length: %d characters\n", nchar(content(result))))
|
||||
|
||||
# Access structured data
|
||||
cat(sprintf("Number of tables: %d\n", length(result$tables)))
|
||||
cat(sprintf("Detected language: %s\n", detected_language(result)))
|
||||
|
||||
# Access metadata
|
||||
author <- metadata_field(result, "author")
|
||||
if (!is.null(author)) {
|
||||
cat(sprintf("Document author: %s\n", author))
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user