This commit is contained in:
86
e2e/go/format_specific_test.go
generated
Normal file
86
e2e/go/format_specific_test.go
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: format_specific
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_FormatDocxStandalone(t *testing.T) {
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
||||
}
|
||||
|
||||
func Test_FormatHwpxStandalone(t *testing.T) {
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
||||
if !strings.Contains(string(result.Content), `Hello from HWPX`) {
|
||||
t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_FormatPdfText(t *testing.T) {
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
||||
if strings.Contains(string(result.Content), `May`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_FormatPptx(t *testing.T) {
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation`
|
||||
_, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_FormatXlsx(t *testing.T) {
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
|
||||
_, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user