Files
fil/e2e/go/format_specific_test.go
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

87 lines
3.1 KiB
Go
Generated

// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: format_specific
package e2e_test
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_FormatDocxStandalone(t *testing.T) {
// Standalone DOCX extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`)
if contentBytesErr != nil {
t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
}
func Test_FormatHwpxStandalone(t *testing.T) {
// Standalone HWPX extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`)
if contentBytesErr != nil {
t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
if !strings.Contains(string(result.Content), `Hello from HWPX`) {
t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content)
}
}
func Test_FormatPdfText(t *testing.T) {
// Standalone PDF text extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
{
found := false
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if strings.Contains(string(result.Content), `May`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_FormatPptx(t *testing.T) {
// PPTX presentation extraction using extract_file_sync
mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation`
_, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_FormatXlsx(t *testing.T) {
// XLSX spreadsheet extraction using extract_file_sync
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
_, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}