# This file is auto-generated by alef — DO NOT EDIT. # alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 # To regenerate: alef generate # To verify freshness: alef verify --exit-code # Issues & docs: https://github.com/kreuzberg-dev/alef # E2e tests for category: smoke defmodule E2e.SmokeTest do use ExUnit.Case, async: false defp alef_e2e_item_texts(item) when is_binary(item), do: [item] defp alef_e2e_item_texts(item) do [:kind, :name, :signature, :path, :alias, :text, :source] |> Enum.filter(&Map.has_key?(item, &1)) |> Enum.flat_map(fn attr -> case Map.get(item, attr) do nil -> [] atom when is_atom(atom) -> [atom |> to_string() |> String.capitalize()] str -> [inspect(str)] end end) end defp alef_e2e_format_to_string(value) when is_binary(value), do: value defp alef_e2e_format_to_string(metadata) do case metadata.image do %{format: fmt} when is_binary(fmt) -> fmt _ -> case metadata.pdf do %{} -> "PDF" _ -> case metadata.html do %{} -> "HTML" _ -> inspect(metadata) end end end end describe "ocr_image_png" do test "ocr_image_png" do content = File.read!("../../test_documents/images/test_hello_world.png") {:ok, result} = Kreuzberg.extract_bytes_async(content, "image/png", "{}") assert String.trim(result.mime_type) == "image/png" assert (is_binary(result.content) && byte_size(result.content) >= 1) || (is_list(result.content) && length(result.content) >= 1) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 1) assert Enum.any?(["Hello", "World", "hello", "world"], fn v -> String.contains?(to_string(result.content), v) end) end end describe "smoke_docx_basic" do test "smoke_docx_basic" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/docx/fake.docx", mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config: "{}") assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20) assert Enum.any?(["Lorem", "ipsum", "document", "text"], fn v -> String.contains?(to_string(result.content), v) end) end end describe "smoke_html_basic" do test "smoke_html_basic" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/html/simple_table.html", mime_type: "text/html", config: "{}") assert String.trim(result.mime_type) == "text/html" assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10) assert Enum.any?(["Sample Data Table", "Laptop", "Electronics", "Product"], fn v -> String.contains?(to_string(result.content), v) end) end end describe "smoke_image_png" do test "smoke_image_png" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/images/sample.png", config: "{\"disable_ocr\":true}") assert String.trim(result.mime_type) == "image/png" end end describe "smoke_json_basic" do test "smoke_json_basic" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/json/simple.json", mime_type: "application/json", config: "{}") assert String.trim(result.mime_type) == "application/json" assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5) end end describe "smoke_pdf_basic" do test "smoke_pdf_basic" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", mime_type: "application/pdf", config: "{}") assert String.trim(result.mime_type) == "application/pdf" assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50) assert Enum.any?(["May 5, 2023", "To Whom it May Concern"], fn v -> String.contains?(to_string(result.content), v) end) end end describe "smoke_txt_basic" do test "smoke_txt_basic" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/text/report.txt", mime_type: "text/plain", config: "{}") assert String.trim(result.mime_type) == "text/plain" assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5) end end describe "smoke_xlsx_basic" do test "smoke_xlsx_basic" do {:ok, result} = Kreuzberg.extract_file_async("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config: "{}") assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" assert (is_binary(result.content) && byte_size(result.content) >= 100) || (is_list(result.content) && length(result.content) >= 100) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 100) assert String.contains?(to_string(result.content), "Team") assert String.contains?(to_string(result.content), "Location") assert String.contains?(to_string(result.content), "Stanley Cups") assert String.contains?(to_string(result.content), "Blues") assert String.contains?(to_string(result.content), "Flyers") assert String.contains?(to_string(result.content), "Maple Leafs") assert String.contains?(to_string(result.content), "STL") assert String.contains?(to_string(result.content), "PHI") assert String.contains?(to_string(result.content), "TOR") # skipped: field 'tables' not available on result type # skipped: field 'metadata.format.excel.sheet_count' not available on result type # skipped: field 'metadata.format.excel.sheet_names' not available on result type end end end