87 lines
3.1 KiB
Go
Generated
87 lines
3.1 KiB
Go
Generated
// This file is auto-generated by alef — DO NOT EDIT.
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
|
// To regenerate: alef generate
|
|
// To verify freshness: alef verify --exit-code
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
|
|
// E2e tests for category: format_specific
|
|
package e2e_test
|
|
|
|
import (
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
|
)
|
|
|
|
func Test_FormatDocxStandalone(t *testing.T) {
|
|
// Standalone DOCX extraction using extract_bytes_sync
|
|
contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`)
|
|
if contentBytesErr != nil {
|
|
t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr)
|
|
}
|
|
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{})
|
|
if err != nil {
|
|
t.Fatalf("call failed: %v", err)
|
|
}
|
|
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
|
}
|
|
|
|
func Test_FormatHwpxStandalone(t *testing.T) {
|
|
// Standalone HWPX extraction using extract_bytes_sync
|
|
contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`)
|
|
if contentBytesErr != nil {
|
|
t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr)
|
|
}
|
|
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{})
|
|
if err != nil {
|
|
t.Fatalf("call failed: %v", err)
|
|
}
|
|
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
|
if !strings.Contains(string(result.Content), `Hello from HWPX`) {
|
|
t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content)
|
|
}
|
|
}
|
|
|
|
func Test_FormatPdfText(t *testing.T) {
|
|
// Standalone PDF text extraction using extract_bytes_sync
|
|
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
|
if contentBytesErr != nil {
|
|
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
|
}
|
|
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
|
|
if err != nil {
|
|
t.Fatalf("call failed: %v", err)
|
|
}
|
|
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
|
|
{
|
|
found := false
|
|
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
|
if strings.Contains(string(result.Content), `May`) { found = true }
|
|
if !found {
|
|
t.Errorf("expected to contain at least one of the specified values")
|
|
}
|
|
}
|
|
}
|
|
|
|
func Test_FormatPptx(t *testing.T) {
|
|
// PPTX presentation extraction using extract_file_sync
|
|
mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation`
|
|
_, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
|
if err != nil {
|
|
t.Fatalf("call failed: %v", err)
|
|
}
|
|
}
|
|
|
|
func Test_FormatXlsx(t *testing.T) {
|
|
// XLSX spreadsheet extraction using extract_file_sync
|
|
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
|
|
_, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
|
if err != nil {
|
|
t.Fatalf("call failed: %v", err)
|
|
}
|
|
}
|