# This file is auto-generated by alef — DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 # To regenerate: alef generate # To verify freshness: alef verify --exit-code # Issues & docs: https://github.com/kreuzberg-dev/alef # E2e tests for category: format_specific defmodule E2e.FormatSpecificTest do use ExUnit.Case, async: false describe "format_docx_standalone" do test "format_docx_standalone" do content = File.read!("../../test_documents/docx/fake.docx") {:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document") assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20) end end describe "format_hwpx_standalone" do test "format_hwpx_standalone" do content = File.read!("../../test_documents/hwpx/simple.hwpx") {:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/haansofthwpx") assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20) assert String.contains?(to_string(result.content), "Hello from HWPX") end end describe "format_pdf_text" do test "format_pdf_text" do content = File.read!("../../test_documents/pdf/fake_memo.pdf") {:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf") assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50) assert Enum.any?(["Mallori", "May"], fn v -> String.contains?(to_string(result.content), v) end) end end describe "format_pptx" do test "format_pptx" do {:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pptx/simple.pptx", mime_type: "application/vnd.openxmlformats-officedocument.presentationml.presentation") end end describe "format_xlsx" do test "format_xlsx" do {:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") end end end