Files
fil/e2e/go/smoke_test.go

197 lines
7.3 KiB
Go
Raw Permalink Normal View History

2026-06-01 23:40:55 +02:00
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: smoke
package e2e_test
import (
"encoding/json"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_OcrImagePng(t *testing.T) {
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytes(contentBytes, `image/png`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 1, "expected length >= 1")
{
found := false
if strings.Contains(string(result.Content), `Hello`) { found = true }
if strings.Contains(string(result.Content), `World`) { found = true }
if strings.Contains(string(result.Content), `hello`) { found = true }
if strings.Contains(string(result.Content), `world`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeDocxBasic(t *testing.T) {
// Smoke test: DOCX with formatted text
mime_typeVal := `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
result, err := kreuzberg.ExtractFile(`docx/fake.docx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
{
found := false
if strings.Contains(string(result.Content), `Lorem`) { found = true }
if strings.Contains(string(result.Content), `ipsum`) { found = true }
if strings.Contains(string(result.Content), `document`) { found = true }
if strings.Contains(string(result.Content), `text`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeHtmlBasic(t *testing.T) {
// Smoke test: HTML table extraction
mime_typeVal := `text/html`
result, err := kreuzberg.ExtractFile(`html/simple_table.html`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/html` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `Sample Data Table`) { found = true }
if strings.Contains(string(result.Content), `Laptop`) { found = true }
if strings.Contains(string(result.Content), `Electronics`) { found = true }
if strings.Contains(string(result.Content), `Product`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeImagePng(t *testing.T) {
// Smoke test: PNG image (without OCR, metadata only)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"disable_ocr":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`images/sample.png`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
}
func Test_SmokeJsonBasic(t *testing.T) {
// Smoke test: JSON file extraction
mime_typeVal := `application/json`
result, err := kreuzberg.ExtractFile(`json/simple.json`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/json` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_SmokePdfBasic(t *testing.T) {
// Smoke test: PDF with simple text extraction
mime_typeVal := `application/pdf`
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `To Whom it May Concern`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeTxtBasic(t *testing.T) {
// Smoke test: Plain text file
mime_typeVal := `text/plain`
result, err := kreuzberg.ExtractFile(`text/report.txt`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/plain` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_SmokeXlsxBasic(t *testing.T) {
// Smoke test: XLSX with basic spreadsheet data including tables
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
result, err := kreuzberg.ExtractFile(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 100, "expected length >= 100")
if !strings.Contains(string(result.Content), `Team`) {
t.Errorf("expected to contain %s", `Team`)
}
if !strings.Contains(string(result.Content), `Location`) {
t.Errorf("expected to contain %s", `Location`)
}
if !strings.Contains(string(result.Content), `Stanley Cups`) {
t.Errorf("expected to contain %s", `Stanley Cups`)
}
if !strings.Contains(string(result.Content), `Blues`) {
t.Errorf("expected to contain %s", `Blues`)
}
if !strings.Contains(string(result.Content), `Flyers`) {
t.Errorf("expected to contain %s", `Flyers`)
}
if !strings.Contains(string(result.Content), `Maple Leafs`) {
t.Errorf("expected to contain %s", `Maple Leafs`)
}
if !strings.Contains(string(result.Content), `STL`) {
t.Errorf("expected to contain %s", `STL`)
}
if !strings.Contains(string(result.Content), `PHI`) {
t.Errorf("expected to contain %s", `PHI`)
}
if !strings.Contains(string(result.Content), `TOR`) {
t.Errorf("expected to contain %s", `TOR`)
}
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}