// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef // E2e tests for category: format_specific package e2e_test import ( "os" "strings" "testing" "github.com/stretchr/testify/assert" kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5" ) func Test_FormatDocxStandalone(t *testing.T) { // Standalone DOCX extraction using extract_bytes_sync contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`) if contentBytesErr != nil { t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr) } result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{}) if err != nil { t.Fatalf("call failed: %v", err) } assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20") } func Test_FormatHwpxStandalone(t *testing.T) { // Standalone HWPX extraction using extract_bytes_sync contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`) if contentBytesErr != nil { t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr) } result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{}) if err != nil { t.Fatalf("call failed: %v", err) } assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20") if !strings.Contains(string(result.Content), `Hello from HWPX`) { t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content) } } func Test_FormatPdfText(t *testing.T) { // Standalone PDF text extraction using extract_bytes_sync contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`) if contentBytesErr != nil { t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr) } result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{}) if err != nil { t.Fatalf("call failed: %v", err) } assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50") { found := false if strings.Contains(string(result.Content), `Mallori`) { found = true } if strings.Contains(string(result.Content), `May`) { found = true } if !found { t.Errorf("expected to contain at least one of the specified values") } } } func Test_FormatPptx(t *testing.T) { // PPTX presentation extraction using extract_file_sync mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation` _, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{}) if err != nil { t.Fatalf("call failed: %v", err) } } func Test_FormatXlsx(t *testing.T) { // XLSX spreadsheet extraction using extract_file_sync mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` _, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{}) if err != nil { t.Fatalf("call failed: %v", err) } }