Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

32
e2e/elixir/test/async_test.exs generated Normal file
View File

@@ -0,0 +1,32 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: async
defmodule E2e.AsyncTest do
use ExUnit.Case, async: false
describe "async_extract_bytes" do
test "async_extract_bytes" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_async(content, "application/pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
end
end
describe "async_extract_bytes_empty_mime" do
test "async_extract_bytes_empty_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_async(content, "", "{}")
end
end
describe "async_extract_bytes_invalid_mime" do
test "async_extract_bytes_invalid_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_async(content, "application/x-nonexistent", "{}")
end
end
end

89
e2e/elixir/test/batch_test.exs generated Normal file
View File

@@ -0,0 +1,89 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: batch
defmodule E2e.BatchTest do
use ExUnit.Case, async: false
describe "batch_bytes_invalid_mime" do
@tag :skip
test "batch_bytes_invalid_mime" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_happy" do
@tag :skip
test "batch_extract_bytes_happy" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_mixed_format" do
@tag :skip
test "batch_extract_bytes_mixed_format" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_sync_empty_list" do
@tag :skip
test "batch_extract_bytes_sync_empty_list" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_sync_invalid_mime" do
@tag :skip
test "batch_extract_bytes_sync_invalid_mime" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_async_basic" do
@tag :skip
test "batch_file_async_basic" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_async_not_found" do
@tag :skip
test "batch_file_async_not_found" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_not_found" do
@tag :skip
test "batch_file_not_found" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_partial" do
@tag :skip
test "batch_file_partial" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_sync_basic" do
@tag :skip
test "batch_file_sync_basic" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
end

19
e2e/elixir/test/code_test.exs generated Normal file
View File

@@ -0,0 +1,19 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: code
defmodule E2e.CodeTest do
use ExUnit.Case, async: false
describe "code_shebang_detection" do
test "code_shebang_detection" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/code/script.sh", mime_type: "text/x-source-code")
assert String.trim(result.mime_type) == "text/x-source-code"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert String.contains?(to_string(result.content), "build")
assert String.contains?(to_string(result.content), "clean")
end
end
end

183
e2e/elixir/test/contract_test.exs generated Normal file
View File

@@ -0,0 +1,183 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: contract
defmodule E2e.ContractTest do
use ExUnit.Case, async: false
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
defp alef_e2e_format_to_string(metadata) do
case metadata.image do
%{format: fmt} when is_binary(fmt) -> fmt
_ ->
case metadata.pdf do
%{} -> "PDF"
_ ->
case metadata.html do
%{} -> "HTML"
_ -> inspect(metadata)
end
end
end
end
describe "api_batch_bytes_async" do
test "api_batch_bytes_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "api_batch_bytes_with_configs_async" do
test "api_batch_bytes_with_configs_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
describe "api_batch_file_async" do
test "api_batch_file_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "api_batch_file_with_configs_async" do
test "api_batch_file_with_configs_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
describe "api_extract_bytes_async" do
test "api_extract_bytes_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "api_extract_file_async" do
test "api_extract_file_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "config_chunking_prepend_heading_context" do
test "config_chunking_prepend_heading_context" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/markdown/extraction_test.md", config: "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}")
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'chunks' not available on result type
assert Enum.all?(result.chunks || [], fn c -> c.content != nil and c.content != "" end)
assert Enum.all?(result.chunks || [], fn c -> c.metadata != nil and c.metadata.heading_context != nil end)
assert (case List.first(result.chunks || []) do
c when is_map(c) -> String.trim_leading(c.content || "") |> String.starts_with?("#")
_ -> false
end)
end
end
describe "config_document_structure_with_headings" do
test "config_document_structure_with_headings" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/docx/fake.docx", config: "{\"include_document_structure\":true}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
# skipped: field 'document' not available on result type
# skipped: field 'document.nodes' not available on result type
end
end
describe "config_element_types" do
test "config_element_types" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/docx/unit_test_headers.docx", config: "{\"result_format\":\"element_based\"}")
assert Enum.any?(["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], fn v -> String.contains?(to_string(result.mime_type), v) end)
# skipped: field 'elements' not available on result type
end
end
describe "config_extraction_timeout" do
test "config_extraction_timeout" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"extraction_timeout_secs\":300}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
end
end
describe "config_keywords" do
test "config_keywords" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'keywords' not available on Elixir ExtractionResult
# skipped: field 'keywords' not available on Elixir ExtractionResult
end
end
describe "config_pages" do
test "config_pages" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["PAGE"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "config_quality_enabled" do
test "config_quality_enabled" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"enable_quality_processing\":true}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'quality_score' not available on result type
# skipped: field 'quality_score' not available on result type
# skipped: field 'quality_score' not available on result type
end
end
describe "config_security_limits" do
test "config_security_limits" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/archives/documents.zip", config: "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}")
assert Enum.any?(["application/zip", "application/x-zip-compressed"], fn v -> String.contains?(to_string(result.mime_type), v) end)
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
end
end
describe "config_tree_sitter" do
test "config_tree_sitter" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/code/hello.py", config: "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}")
assert String.trim(result.mime_type) == "text/x-source-code"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "output_format_bytes_markdown" do
test "output_format_bytes_markdown" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf", "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
describe "output_format_markdown" do
test "output_format_markdown" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
end

36
e2e/elixir/test/detection_test.exs generated Normal file
View File

@@ -0,0 +1,36 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: detection
defmodule E2e.DetectionTest do
use ExUnit.Case, async: false
describe "detect_mime_bytes_html" do
test "detect_mime_bytes_html" do
content = File.read!("../../test_documents/html/html.html")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
end
end
describe "detect_mime_bytes_pdf" do
test "detect_mime_bytes_pdf" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
end
end
describe "detect_mime_bytes_png" do
test "detect_mime_bytes_png" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
end
end
describe "get_extensions_unknown_mime" do
test "get_extensions_unknown_mime" do
assert {:error, _} = Kreuzberg.get_extensions_for_mime("application/x-totally-unknown")
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: document_extractor_management
defmodule E2e.DocumentExtractorManagementTest do
use ExUnit.Case, async: false
describe "document_extractors_clear" do
test "document_extractors_clear" do
result = Kreuzberg.clear_document_extractors()
end
end
describe "extractors_list" do
test "extractors_list" do
result = Kreuzberg.list_document_extractors()
end
end
end

View File

@@ -0,0 +1,29 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embed_async_pending
defmodule E2e.EmbedAsyncPendingTest do
use ExUnit.Case, async: false
describe "embed_texts_async_empty_input" do
test "embed_texts_async_empty_input" do
{:ok, result} = Kreuzberg.embed_texts_async([])
assert length(result) == 0
end
end
describe "embed_texts_async_happy" do
test "embed_texts_async_happy" do
{:ok, result} = Kreuzberg.embed_texts_async(["First", "Second"])
assert length(result) >= 2
end
end
describe "embed_texts_async_preset_switch" do
test "embed_texts_async_preset_switch" do
{:ok, result} = Kreuzberg.embed_texts_async(["Text"], "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
end
end
end

15
e2e/elixir/test/embed_extra_test.exs generated Normal file
View File

@@ -0,0 +1,15 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embed_extra
defmodule E2e.EmbedExtraTest do
use ExUnit.Case, async: false
describe "embed_texts_batch" do
test "embed_texts_batch" do
{:ok, result} = Kreuzberg.embed_texts(["Hello", "World"], "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embedding_backend_management
defmodule E2e.EmbeddingBackendManagementTest do
use ExUnit.Case, async: false
describe "embedding_backends_clear" do
test "embedding_backends_clear" do
result = Kreuzberg.clear_embedding_backends()
end
end
describe "embedding_backends_list" do
test "embedding_backends_list" do
result = Kreuzberg.list_embedding_backends()
end
end
end

42
e2e/elixir/test/embeddings_test.exs generated Normal file
View File

@@ -0,0 +1,42 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embeddings
defmodule E2e.EmbeddingsTest do
use ExUnit.Case, async: false
describe "embed_texts_different_preset" do
test "embed_texts_different_preset" do
{:ok, result} = Kreuzberg.embed_texts(["Hello world", "Test"], "{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}")
assert length(result) >= 2
end
end
describe "get_embedding_preset_known" do
test "get_embedding_preset_known" do
result = Kreuzberg.get_embedding_preset("balanced")
end
end
describe "get_embedding_preset_nominal" do
test "get_embedding_preset_nominal" do
result = Kreuzberg.get_embedding_preset("balanced")
end
end
describe "get_embedding_preset_unknown" do
test "get_embedding_preset_unknown" do
result = Kreuzberg.get_embedding_preset("nonexistent-xyz")
assert is_nil(result) or String.trim(result) == ""
end
end
describe "list_embedding_presets_sanity" do
test "list_embedding_presets_sanity" do
result = Kreuzberg.list_embedding_presets()
assert result != ""
end
end
end

44
e2e/elixir/test/error_test.exs generated Normal file
View File

@@ -0,0 +1,44 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: error
defmodule E2e.ErrorTest do
use ExUnit.Case, async: false
describe "error_empty_bytes" do
test "error_empty_bytes" do
content = File.read!("../../test_documents/text/empty.txt")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "text/plain", "{}")
end
end
describe "error_empty_mime" do
test "error_empty_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "", "{}")
end
end
describe "error_extract_bytes_conflicting_ocr" do
test "error_extract_bytes_conflicting_ocr" do
content = File.read!("../../test_documents/text/fake_text.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "text/plain", "{\"disable_ocr\":true,\"force_ocr\":true}")
end
end
describe "error_invalid_mime_format" do
test "error_invalid_mime_format" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "not-a-mime", "{}")
end
end
describe "error_unsupported_mime" do
test "error_unsupported_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "application/x-nonexistent", "{}")
end
end
end

47
e2e/elixir/test/format_specific_test.exs generated Normal file
View File

@@ -0,0 +1,47 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: format_specific
defmodule E2e.FormatSpecificTest do
use ExUnit.Case, async: false
describe "format_docx_standalone" do
test "format_docx_standalone" do
content = File.read!("../../test_documents/docx/fake.docx")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
end
end
describe "format_hwpx_standalone" do
test "format_hwpx_standalone" do
content = File.read!("../../test_documents/hwpx/simple.hwpx")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/haansofthwpx")
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
assert String.contains?(to_string(result.content), "Hello from HWPX")
end
end
describe "format_pdf_text" do
test "format_pdf_text" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf")
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
assert Enum.any?(["Mallori", "May"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "format_pptx" do
test "format_pptx" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pptx/simple.pptx", mime_type: "application/vnd.openxmlformats-officedocument.presentationml.presentation")
end
end
describe "format_xlsx" do
test "format_xlsx" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
end
end
end

32
e2e/elixir/test/mime_utilities_test.exs generated Normal file
View File

@@ -0,0 +1,32 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: mime_utilities
defmodule E2e.MimeUtilitiesTest do
use ExUnit.Case, async: false
describe "mime_detect_bytes" do
test "mime_detect_bytes" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
assert String.contains?(to_string(result), "pdf")
end
end
describe "mime_detect_image" do
test "mime_detect_image" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
assert String.contains?(to_string(result), "png")
end
end
describe "mime_get_extensions" do
test "mime_get_extensions" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/pdf")
assert String.contains?(to_string(result), "pdf")
end
end
end

View File

@@ -0,0 +1,27 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: ocr_backend_management
defmodule E2e.OcrBackendManagementTest do
use ExUnit.Case, async: false
describe "ocr_backends_clear" do
test "ocr_backends_clear" do
result = Kreuzberg.clear_ocr_backends()
end
end
describe "ocr_backends_list" do
test "ocr_backends_list" do
result = Kreuzberg.list_ocr_backends()
end
end
describe "ocr_backends_unregister" do
test "ocr_backends_unregister" do
result = Kreuzberg.unregister_ocr_backend("nonexistent-backend-xyz")
end
end
end

24
e2e/elixir/test/pdf_test.exs generated Normal file
View File

@@ -0,0 +1,24 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: pdf
defmodule E2e.PdfTest do
use ExUnit.Case, async: false
describe "render_pdf_page_first" do
test "render_pdf_page_first" do
pdf_bytes = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.render_pdf_page_to_png(pdf_bytes, 0)
assert (is_binary(result) && byte_size(result) >= 100) || (is_list(result) && length(result) >= 100) || (is_binary(result) == false && is_list(result) == false && String.length(result) >= 100)
end
end
describe "render_pdf_page_out_of_range" do
test "render_pdf_page_out_of_range" do
pdf_bytes = File.read!("../../test_documents/pdf/fake_memo.pdf")
assert {:error, _} = Kreuzberg.render_pdf_page_to_png(pdf_bytes, 999)
end
end
end

327
e2e/elixir/test/plugin_api_test.exs generated Normal file
View File

@@ -0,0 +1,327 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: plugin_api
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge do
def name, do: "test-extractor"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def extract_bytes(content, mime_type, config), do: {:ok, %{}}
def extract_file(path, mime_type, config), do: {:ok, %{}}
def supported_mime_types, do: []
def priority, do: 0
def can_handle(_path, _mime_type), do: false
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("extract_bytes", args), do: [args["content"], args["mime_type"], args["config"]]
defp __alef_ordered_args__("extract_file", args), do: [args["path"], args["mime_type"], args["config"]]
defp __alef_ordered_args__("supported_mime_types", args), do: []
defp __alef_ordered_args__("priority", args), do: []
defp __alef_ordered_args__("can_handle", args), do: [args["_path"], args["_mime_type"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge do
def name, do: "test-embedding-backend"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def dimensions, do: 1
def embed(texts), do: {:ok, []}
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("dimensions", args), do: []
defp __alef_ordered_args__("embed", args), do: [args["texts"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge do
def name, do: "test-backend"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def process_image(image_bytes, config), do: {:ok, %{}}
def process_image_file(path, config), do: {:ok, %{}}
def supports_language(lang), do: false
def backend_type, do: %{}
def supported_languages, do: []
def supports_table_detection, do: false
def supports_document_processing, do: false
def process_document(_path, _config), do: {:ok, %{}}
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("process_image", args), do: [args["image_bytes"], args["config"]]
defp __alef_ordered_args__("process_image_file", args), do: [args["path"], args["config"]]
defp __alef_ordered_args__("supports_language", args), do: [args["lang"]]
defp __alef_ordered_args__("backend_type", args), do: []
defp __alef_ordered_args__("supported_languages", args), do: []
defp __alef_ordered_args__("supports_table_detection", args), do: []
defp __alef_ordered_args__("supports_document_processing", args), do: []
defp __alef_ordered_args__("process_document", args), do: [args["_path"], args["_config"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge do
def name, do: "test-processor"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def process(result, config), do: {:ok, nil}
def processing_stage, do: %{}
def should_process(_result, _config), do: false
def estimated_duration_ms(_result), do: 0
def priority, do: 0
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("process", args), do: [args["result"], args["config"]]
defp __alef_ordered_args__("processing_stage", args), do: []
defp __alef_ordered_args__("should_process", args), do: [args["_result"], args["_config"]]
defp __alef_ordered_args__("estimated_duration_ms", args), do: [args["_result"]]
defp __alef_ordered_args__("priority", args), do: []
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterRendererTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterRendererTraitBridge do
def name, do: "test-renderer"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def render(doc), do: {:ok, ""}
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterRendererTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("render", args), do: [args["doc"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterValidatorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterValidatorTraitBridge do
def name, do: "test-validator"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def validate(result, config), do: {:ok, nil}
def should_validate(_result, _config), do: false
def priority, do: 0
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterValidatorTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("validate", args), do: [args["result"], args["config"]]
defp __alef_ordered_args__("should_validate", args), do: [args["_result"], args["_config"]]
defp __alef_ordered_args__("priority", args), do: []
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
defmodule E2e.PluginApiTest do
use ExUnit.Case, async: false
describe "register_document_extractor_trait_bridge" do
test "register_document_extractor_trait_bridge" do
{:ok, registerdocumentextractortraitbridge_pid} = E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_document_extractor(registerdocumentextractortraitbridge_pid, "test-extractor")
end
end
describe "register_embedding_backend_trait_bridge" do
test "register_embedding_backend_trait_bridge" do
{:ok, registerembeddingbackendtraitbridge_pid} = E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_embedding_backend(registerembeddingbackendtraitbridge_pid, "test-embedding-backend")
end
end
describe "register_ocr_backend_trait_bridge" do
test "register_ocr_backend_trait_bridge" do
{:ok, registerocrbackendtraitbridge_pid} = E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_ocr_backend(registerocrbackendtraitbridge_pid, "test-backend")
end
end
describe "register_post_processor_trait_bridge" do
test "register_post_processor_trait_bridge" do
{:ok, registerpostprocessortraitbridge_pid} = E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_post_processor(registerpostprocessortraitbridge_pid, "test-processor")
end
end
describe "register_renderer_trait_bridge" do
test "register_renderer_trait_bridge" do
{:ok, registerrenderertraitbridge_pid} = E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_renderer(registerrenderertraitbridge_pid, "test-renderer")
end
end
describe "register_validator_trait_bridge" do
test "register_validator_trait_bridge" do
{:ok, registervalidatortraitbridge_pid} = E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_validator(registervalidatortraitbridge_pid, "test-validator")
end
end
describe "unregister_document_extractor_after_register" do
test "unregister_document_extractor_after_register" do
result = Kreuzberg.unregister_document_extractor("test-extractor")
end
end
describe "unregister_embedding_backend_after_register" do
test "unregister_embedding_backend_after_register" do
result = Kreuzberg.unregister_embedding_backend("test-embedding-backend")
end
end
describe "unregister_post_processor_after_register" do
test "unregister_post_processor_after_register" do
result = Kreuzberg.unregister_post_processor("test-processor")
end
end
describe "unregister_renderer_after_register" do
test "unregister_renderer_after_register" do
result = Kreuzberg.unregister_renderer("test-renderer")
end
end
describe "unregister_validator_after_register" do
test "unregister_validator_after_register" do
result = Kreuzberg.unregister_validator("test-validator")
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: post_processor_management
defmodule E2e.PostProcessorManagementTest do
use ExUnit.Case, async: false
describe "post_processors_clear" do
test "post_processors_clear" do
result = Kreuzberg.clear_post_processors()
end
end
describe "post_processors_list" do
test "post_processors_list" do
result = Kreuzberg.list_post_processors()
end
end
end

View File

@@ -0,0 +1,27 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: registry_operations
defmodule E2e.RegistryOperationsTest do
use ExUnit.Case, async: false
describe "extensions_docx" do
test "extensions_docx" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
end
end
describe "extensions_html" do
test "extensions_html" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("text/html")
end
end
describe "extensions_pdf" do
test "extensions_pdf" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/pdf")
end
end
end

45
e2e/elixir/test/registry_test.exs generated Normal file
View File

@@ -0,0 +1,45 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: registry
defmodule E2e.RegistryTest do
use ExUnit.Case, async: false
describe "list_document_extractors" do
test "list_document_extractors" do
result = Kreuzberg.list_document_extractors()
end
end
describe "list_embedding_backends" do
test "list_embedding_backends" do
result = Kreuzberg.list_embedding_backends()
end
end
describe "list_ocr_backends" do
test "list_ocr_backends" do
result = Kreuzberg.list_ocr_backends()
end
end
describe "list_post_processors" do
test "list_post_processors" do
result = Kreuzberg.list_post_processors()
end
end
describe "list_renderers" do
test "list_renderers" do
result = Kreuzberg.list_renderers()
end
end
describe "list_validators" do
test "list_validators" do
result = Kreuzberg.list_validators()
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: renderer_management
defmodule E2e.RendererManagementTest do
use ExUnit.Case, async: false
describe "renderers_clear" do
test "renderers_clear" do
result = Kreuzberg.clear_renderers()
end
end
describe "renderers_list" do
test "renderers_list" do
result = Kreuzberg.list_renderers()
end
end
end

118
e2e/elixir/test/smoke_test.exs generated Normal file
View File

@@ -0,0 +1,118 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: smoke
defmodule E2e.SmokeTest do
use ExUnit.Case, async: false
defp alef_e2e_item_texts(item) when is_binary(item), do: [item]
defp alef_e2e_item_texts(item) do
[:kind, :name, :signature, :path, :alias, :text, :source]
|> Enum.filter(&Map.has_key?(item, &1))
|> Enum.flat_map(fn attr ->
case Map.get(item, attr) do
nil -> []
atom when is_atom(atom) -> [atom |> to_string() |> String.capitalize()]
str -> [inspect(str)]
end
end)
end
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
defp alef_e2e_format_to_string(metadata) do
case metadata.image do
%{format: fmt} when is_binary(fmt) -> fmt
_ ->
case metadata.pdf do
%{} -> "PDF"
_ ->
case metadata.html do
%{} -> "HTML"
_ -> inspect(metadata)
end
end
end
end
describe "ocr_image_png" do
test "ocr_image_png" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.extract_bytes_async(content, "image/png", "{}")
assert String.trim(result.mime_type) == "image/png"
assert (is_binary(result.content) && byte_size(result.content) >= 1) || (is_list(result.content) && length(result.content) >= 1) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 1)
assert Enum.any?(["Hello", "World", "hello", "world"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_docx_basic" do
test "smoke_docx_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/docx/fake.docx", mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config: "{}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
assert Enum.any?(["Lorem", "ipsum", "document", "text"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_html_basic" do
test "smoke_html_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/html/simple_table.html", mime_type: "text/html", config: "{}")
assert String.trim(result.mime_type) == "text/html"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["Sample Data Table", "Laptop", "Electronics", "Product"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_image_png" do
test "smoke_image_png" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/images/sample.png", config: "{\"disable_ocr\":true}")
assert String.trim(result.mime_type) == "image/png"
end
end
describe "smoke_json_basic" do
test "smoke_json_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/json/simple.json", mime_type: "application/json", config: "{}")
assert String.trim(result.mime_type) == "application/json"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "smoke_pdf_basic" do
test "smoke_pdf_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", mime_type: "application/pdf", config: "{}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
assert Enum.any?(["May 5, 2023", "To Whom it May Concern"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_txt_basic" do
test "smoke_txt_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/text/report.txt", mime_type: "text/plain", config: "{}")
assert String.trim(result.mime_type) == "text/plain"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "smoke_xlsx_basic" do
test "smoke_xlsx_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config: "{}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
assert (is_binary(result.content) && byte_size(result.content) >= 100) || (is_list(result.content) && length(result.content) >= 100) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 100)
assert String.contains?(to_string(result.content), "Team")
assert String.contains?(to_string(result.content), "Location")
assert String.contains?(to_string(result.content), "Stanley Cups")
assert String.contains?(to_string(result.content), "Blues")
assert String.contains?(to_string(result.content), "Flyers")
assert String.contains?(to_string(result.content), "Maple Leafs")
assert String.contains?(to_string(result.content), "STL")
assert String.contains?(to_string(result.content), "PHI")
assert String.contains?(to_string(result.content), "TOR")
# skipped: field 'tables' not available on result type
# skipped: field 'metadata.format.excel.sheet_count' not available on result type
# skipped: field 'metadata.format.excel.sheet_names' not available on result type
end
end
end

1
e2e/elixir/test/test_helper.exs generated Normal file
View File

@@ -0,0 +1 @@
ExUnit.start()

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: validator_management
defmodule E2e.ValidatorManagementTest do
use ExUnit.Case, async: false
describe "validators_clear" do
test "validators_clear" do
result = Kreuzberg.clear_validators()
end
end
describe "validators_list" do
test "validators_list" do
result = Kreuzberg.list_validators()
end
end
end