This commit is contained in:
118
e2e/elixir/test/smoke_test.exs
generated
Normal file
118
e2e/elixir/test/smoke_test.exs
generated
Normal file
@@ -0,0 +1,118 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: smoke
|
||||
defmodule E2e.SmokeTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
defp alef_e2e_item_texts(item) when is_binary(item), do: [item]
|
||||
defp alef_e2e_item_texts(item) do
|
||||
[:kind, :name, :signature, :path, :alias, :text, :source]
|
||||
|> Enum.filter(&Map.has_key?(item, &1))
|
||||
|> Enum.flat_map(fn attr ->
|
||||
case Map.get(item, attr) do
|
||||
nil -> []
|
||||
atom when is_atom(atom) -> [atom |> to_string() |> String.capitalize()]
|
||||
str -> [inspect(str)]
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
|
||||
defp alef_e2e_format_to_string(metadata) do
|
||||
case metadata.image do
|
||||
%{format: fmt} when is_binary(fmt) -> fmt
|
||||
_ ->
|
||||
case metadata.pdf do
|
||||
%{} -> "PDF"
|
||||
_ ->
|
||||
case metadata.html do
|
||||
%{} -> "HTML"
|
||||
_ -> inspect(metadata)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "ocr_image_png" do
|
||||
test "ocr_image_png" do
|
||||
content = File.read!("../../test_documents/images/test_hello_world.png")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_async(content, "image/png", "{}")
|
||||
assert String.trim(result.mime_type) == "image/png"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 1) || (is_list(result.content) && length(result.content) >= 1) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 1)
|
||||
assert Enum.any?(["Hello", "World", "hello", "world"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_docx_basic" do
|
||||
test "smoke_docx_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/docx/fake.docx", mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
|
||||
assert Enum.any?(["Lorem", "ipsum", "document", "text"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_html_basic" do
|
||||
test "smoke_html_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/html/simple_table.html", mime_type: "text/html", config: "{}")
|
||||
assert String.trim(result.mime_type) == "text/html"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["Sample Data Table", "Laptop", "Electronics", "Product"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_image_png" do
|
||||
test "smoke_image_png" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/images/sample.png", config: "{\"disable_ocr\":true}")
|
||||
assert String.trim(result.mime_type) == "image/png"
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_json_basic" do
|
||||
test "smoke_json_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/json/simple.json", mime_type: "application/json", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/json"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_pdf_basic" do
|
||||
test "smoke_pdf_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", mime_type: "application/pdf", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
|
||||
assert Enum.any?(["May 5, 2023", "To Whom it May Concern"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_txt_basic" do
|
||||
test "smoke_txt_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/text/report.txt", mime_type: "text/plain", config: "{}")
|
||||
assert String.trim(result.mime_type) == "text/plain"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_xlsx_basic" do
|
||||
test "smoke_xlsx_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 100) || (is_list(result.content) && length(result.content) >= 100) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 100)
|
||||
assert String.contains?(to_string(result.content), "Team")
|
||||
assert String.contains?(to_string(result.content), "Location")
|
||||
assert String.contains?(to_string(result.content), "Stanley Cups")
|
||||
assert String.contains?(to_string(result.content), "Blues")
|
||||
assert String.contains?(to_string(result.content), "Flyers")
|
||||
assert String.contains?(to_string(result.content), "Maple Leafs")
|
||||
assert String.contains?(to_string(result.content), "STL")
|
||||
assert String.contains?(to_string(result.content), "PHI")
|
||||
assert String.contains?(to_string(result.content), "TOR")
|
||||
# skipped: field 'tables' not available on result type
|
||||
# skipped: field 'metadata.format.excel.sheet_count' not available on result type
|
||||
# skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user