Files
fil/e2e/elixir/test/smoke_test.exs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

119 lines
6.6 KiB
Elixir
Generated

# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: smoke
defmodule E2e.SmokeTest do
use ExUnit.Case, async: false
defp alef_e2e_item_texts(item) when is_binary(item), do: [item]
defp alef_e2e_item_texts(item) do
[:kind, :name, :signature, :path, :alias, :text, :source]
|> Enum.filter(&Map.has_key?(item, &1))
|> Enum.flat_map(fn attr ->
case Map.get(item, attr) do
nil -> []
atom when is_atom(atom) -> [atom |> to_string() |> String.capitalize()]
str -> [inspect(str)]
end
end)
end
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
defp alef_e2e_format_to_string(metadata) do
case metadata.image do
%{format: fmt} when is_binary(fmt) -> fmt
_ ->
case metadata.pdf do
%{} -> "PDF"
_ ->
case metadata.html do
%{} -> "HTML"
_ -> inspect(metadata)
end
end
end
end
describe "ocr_image_png" do
test "ocr_image_png" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.extract_bytes_async(content, "image/png", "{}")
assert String.trim(result.mime_type) == "image/png"
assert (is_binary(result.content) && byte_size(result.content) >= 1) || (is_list(result.content) && length(result.content) >= 1) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 1)
assert Enum.any?(["Hello", "World", "hello", "world"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_docx_basic" do
test "smoke_docx_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/docx/fake.docx", mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config: "{}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
assert Enum.any?(["Lorem", "ipsum", "document", "text"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_html_basic" do
test "smoke_html_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/html/simple_table.html", mime_type: "text/html", config: "{}")
assert String.trim(result.mime_type) == "text/html"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["Sample Data Table", "Laptop", "Electronics", "Product"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_image_png" do
test "smoke_image_png" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/images/sample.png", config: "{\"disable_ocr\":true}")
assert String.trim(result.mime_type) == "image/png"
end
end
describe "smoke_json_basic" do
test "smoke_json_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/json/simple.json", mime_type: "application/json", config: "{}")
assert String.trim(result.mime_type) == "application/json"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "smoke_pdf_basic" do
test "smoke_pdf_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", mime_type: "application/pdf", config: "{}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
assert Enum.any?(["May 5, 2023", "To Whom it May Concern"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_txt_basic" do
test "smoke_txt_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/text/report.txt", mime_type: "text/plain", config: "{}")
assert String.trim(result.mime_type) == "text/plain"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "smoke_xlsx_basic" do
test "smoke_xlsx_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config: "{}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
assert (is_binary(result.content) && byte_size(result.content) >= 100) || (is_list(result.content) && length(result.content) >= 100) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 100)
assert String.contains?(to_string(result.content), "Team")
assert String.contains?(to_string(result.content), "Location")
assert String.contains?(to_string(result.content), "Stanley Cups")
assert String.contains?(to_string(result.content), "Blues")
assert String.contains?(to_string(result.content), "Flyers")
assert String.contains?(to_string(result.content), "Maple Leafs")
assert String.contains?(to_string(result.content), "STL")
assert String.contains?(to_string(result.content), "PHI")
assert String.contains?(to_string(result.content), "TOR")
# skipped: field 'tables' not available on result type
# skipped: field 'metadata.format.excel.sheet_count' not available on result type
# skipped: field 'metadata.format.excel.sheet_names' not available on result type
end
end
end