Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
```r title="R"
library(kreuzberg)
result <- extract_file_sync("document.pdf")
cat("Language Detection Results:\n\n")
cat("Using direct field access:\n")
cat("Detected Language:", result$detected_language, "\n\n")
cat("Using S3 helper function:\n")
lang <- detected_language(result)
cat("Language (via helper):", lang, "\n\n")
cat("Language Information:\n")
if (lang == "en") {
cat("This is an English document\n")
} else if (lang == "es") {
cat("This is a Spanish document\n")
} else {
cat(sprintf("This is a %s document\n", lang))
}
```

View File

@@ -0,0 +1,13 @@
```r title="R"
library(kreuzberg)
files <- c("english.pdf", "spanish.pdf", "french.pdf")
config <- list(language_detection = list(enabled = TRUE))
for (file in files) {
json <- extract_file_sync(file, "application/pdf", config)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
cat(sprintf("%s: detected language = %s\n",
file, result$detected_language))
}
```

View File

@@ -0,0 +1,25 @@
```r title="R"
library(kreuzberg)
result <- extract_file_sync("document.pdf")
cat("Detected Language:", result$detected_language, "\n")
cat("Quality Score:", result$quality_score, "\n")
cat("Keywords:", paste(result$keywords, collapse=", "), "\n\n")
cat("Metadata fields:\n")
authors <- metadata_field(result, "authors")
if (!is.null(authors)) {
cat("Authors:", paste(authors, collapse=", "), "\n")
}
created <- metadata_field(result, "created_date")
if (!is.null(created)) {
cat("Created Date:", created, "\n")
}
pages_meta <- metadata_field(result, "page_count")
if (!is.null(pages_meta)) {
cat("Pages:", pages_meta, "\n")
}
```

View File

@@ -0,0 +1,22 @@
```r title="R"
library(kreuzberg)
result <- extract_file_sync("document.pdf")
boundaries <- result$metadata$pages$boundaries
if (!is.null(boundaries) && length(boundaries) > 0L) {
content_bytes <- charToRaw(result$content)
for (i in seq_len(min(3L, length(boundaries)))) {
boundary <- boundaries[[i]]
page_bytes <- content_bytes[(boundary$byte_start + 1L):boundary$byte_end]
page_text <- rawToChar(page_bytes)
preview_end <- min(100L, nchar(page_text))
cat(sprintf("Page %d:\n", boundary$page_number))
cat(sprintf(" Byte range: %d-%d\n", boundary$byte_start, boundary$byte_end))
cat(sprintf(" Preview: %s...\n", substr(page_text, 1L, preview_end)))
}
}
```

View File

@@ -0,0 +1,20 @@
```r title="R"
library(kreuzberg)
result <- extract_file_sync("document.pdf")
cat("Total pages:", page_count(result), "\n\n")
for (i in seq_along(result$pages)) {
page <- result$pages[[i]]
cat(sprintf("Page %d:\n", i))
cat(" Elements:", length(page$elements), "\n")
cat(" Text content length:", nchar(page$content), "chars\n")
if (nchar(page$content) > 0L) {
preview <- substr(page$content, 1L, 100L)
cat(sprintf(" Preview: %s...\n", preview))
}
cat("\n")
}
```

View File

@@ -0,0 +1,22 @@
```r title="R"
library(kreuzberg)
result <- extract_file_sync("spreadsheet.xlsx")
cat("Tables extracted:", length(result$tables), "\n\n")
for (i in seq_along(result$tables)) {
table <- result$tables[[i]]
cat(sprintf("Table %d:\n", i))
cat(" Rows:", nrow(table), "\n")
cat(" Columns:", ncol(table), "\n")
cat(" Column names:", paste(colnames(table), collapse=", "), "\n")
cat("\n")
if (nrow(table) > 0L) {
cat(" Preview (first 3 rows):\n")
print(head(table, 3L))
cat("\n")
}
}
```

View File

@@ -0,0 +1,24 @@
```r title="R"
library(kreuzberg)
config <- list(
chunking = list(max_characters = 1000L, overlap = 200L)
)
json <- extract_file_sync("document.pdf", "application/pdf", config)
result <- jsonlite::fromJSON(json, simplifyVector = FALSE)
for (i in seq_len(min(3L, length(result$chunks)))) {
chunk <- result$chunks[[i]]
vector_doc <- list(
id = sprintf("doc_%d", i),
text = chunk,
metadata = list(
source = "document.pdf",
chunk_index = i,
length = nchar(chunk)
)
)
cat(sprintf("Vector DB entry %d: %d chars\n", i, nchar(chunk)))
}
```