Files
fil/e2e/r/tests/test_format_specific.R

37 lines
2.5 KiB
R
Raw Normal View History

2026-06-01 23:40:55 +02:00
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: format_specific
test_that("format_docx_standalone: Standalone DOCX extraction using extract_bytes_sync", {
result <- jsonlite::fromJSON(extract_bytes_sync(content = readBin(.resolve_fixture("docx/fake.docx"), what = "raw", n = file.info(.resolve_fixture("docx/fake.docx"))$size), mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config = ExtractionConfig$default()), simplifyVector = FALSE)
expect_true(TRUE)
expect_true(nchar(result$content) >= 20)
})
test_that("format_hwpx_standalone: Standalone HWPX extraction using extract_bytes_sync", {
result <- jsonlite::fromJSON(extract_bytes_sync(content = readBin(.resolve_fixture("hwpx/simple.hwpx"), what = "raw", n = file.info(.resolve_fixture("hwpx/simple.hwpx"))$size), mime_type = "application/haansofthwpx", config = ExtractionConfig$default()), simplifyVector = FALSE)
expect_true(TRUE)
expect_true(nchar(result$content) >= 20)
expect_true(grepl("Hello from HWPX", result$content, fixed = TRUE))
})
test_that("format_pdf_text: Standalone PDF text extraction using extract_bytes_sync", {
result <- jsonlite::fromJSON(extract_bytes_sync(content = readBin(.resolve_fixture("pdf/fake_memo.pdf"), what = "raw", n = file.info(.resolve_fixture("pdf/fake_memo.pdf"))$size), mime_type = "application/pdf", config = ExtractionConfig$default()), simplifyVector = FALSE)
expect_true(TRUE)
expect_true(nchar(result$content) >= 50)
expect_true(any(sapply(c("Mallori", "May"), function(v) grepl(v, result$content, fixed = TRUE))))
})
test_that("format_pptx: PPTX presentation extraction using extract_file_sync", {
result <- jsonlite::fromJSON(extract_file_sync(path = .resolve_fixture("pptx/simple.pptx"), mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation", config = ExtractionConfig$default()), simplifyVector = FALSE)
expect_true(TRUE)
})
test_that("format_xlsx: XLSX spreadsheet extraction using extract_file_sync", {
result <- jsonlite::fromJSON(extract_file_sync(path = .resolve_fixture("xlsx/stanley_cups.xlsx"), mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config = ExtractionConfig$default()), simplifyVector = FALSE)
expect_true(TRUE)
})