Files
fil/e2e/elixir/test/format_specific_test.exs

48 lines
2.5 KiB
Elixir
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: format_specific
defmodule E2e.FormatSpecificTest do
use ExUnit.Case, async: false
describe "format_docx_standalone" do
test "format_docx_standalone" do
content = File.read!("../../test_documents/docx/fake.docx")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
end
end
describe "format_hwpx_standalone" do
test "format_hwpx_standalone" do
content = File.read!("../../test_documents/hwpx/simple.hwpx")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/haansofthwpx")
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
assert String.contains?(to_string(result.content), "Hello from HWPX")
end
end
describe "format_pdf_text" do
test "format_pdf_text" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf")
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
assert Enum.any?(["Mallori", "May"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "format_pptx" do
test "format_pptx" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pptx/simple.pptx", mime_type: "application/vnd.openxmlformats-officedocument.presentationml.presentation")
end
end
describe "format_xlsx" do
test "format_xlsx" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
end
end
end