# This file is auto-generated by alef — DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 # To regenerate: alef generate # To verify freshness: alef verify --exit-code # Issues & docs: https://github.com/kreuzberg-dev/alef # E2e tests for category: format_specific test_that("format_docx_standalone: Standalone DOCX extraction using extract_bytes_sync", { result <- jsonlite::fromJSON(extract_bytes_sync(content = readBin(.resolve_fixture("docx/fake.docx"), what = "raw", n = file.info(.resolve_fixture("docx/fake.docx"))$size), mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config = ExtractionConfig$default()), simplifyVector = FALSE) expect_true(TRUE) expect_true(nchar(result$content) >= 20) }) test_that("format_hwpx_standalone: Standalone HWPX extraction using extract_bytes_sync", { result <- jsonlite::fromJSON(extract_bytes_sync(content = readBin(.resolve_fixture("hwpx/simple.hwpx"), what = "raw", n = file.info(.resolve_fixture("hwpx/simple.hwpx"))$size), mime_type = "application/haansofthwpx", config = ExtractionConfig$default()), simplifyVector = FALSE) expect_true(TRUE) expect_true(nchar(result$content) >= 20) expect_true(grepl("Hello from HWPX", result$content, fixed = TRUE)) }) test_that("format_pdf_text: Standalone PDF text extraction using extract_bytes_sync", { result <- jsonlite::fromJSON(extract_bytes_sync(content = readBin(.resolve_fixture("pdf/fake_memo.pdf"), what = "raw", n = file.info(.resolve_fixture("pdf/fake_memo.pdf"))$size), mime_type = "application/pdf", config = ExtractionConfig$default()), simplifyVector = FALSE) expect_true(TRUE) expect_true(nchar(result$content) >= 50) expect_true(any(sapply(c("Mallori", "May"), function(v) grepl(v, result$content, fixed = TRUE)))) }) test_that("format_pptx: PPTX presentation extraction using extract_file_sync", { result <- jsonlite::fromJSON(extract_file_sync(path = .resolve_fixture("pptx/simple.pptx"), mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation", config = ExtractionConfig$default()), simplifyVector = FALSE) expect_true(TRUE) }) test_that("format_xlsx: XLSX spreadsheet extraction using extract_file_sync", { result <- jsonlite::fromJSON(extract_file_sync(path = .resolve_fixture("xlsx/stanley_cups.xlsx"), mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config = ExtractionConfig$default()), simplifyVector = FALSE) expect_true(TRUE) })