// This file is auto-generated by alef — DO NOT EDIT. // alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75 // To regenerate: alef generate // To verify freshness: alef verify --exit-code // Issues & docs: https://github.com/kreuzberg-dev/alef using System; using System.Collections.Generic; using System.Linq; using System.Net.Http; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using System.Threading.Tasks; using Xunit; using Kreuzberg; using static Kreuzberg.KreuzbergLib; namespace Kreuzberg { /// E2e tests for category: format_specific. public class FormatSpecificTests { private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault }; [Fact] public void Test_FormatDocxStandalone() { // Standalone DOCX extraction using extract_bytes_sync var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null); Assert.True(result.Content.Length >= 20, "expected length >= 20"); } [Fact] public void Test_FormatHwpxStandalone() { // Standalone HWPX extraction using extract_bytes_sync var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("hwpx/simple.hwpx"), "application/haansofthwpx", null); Assert.True(result.Content.Length >= 20, "expected length >= 20"); Assert.Contains("hello from hwpx", result.Content.ToString().ToLower()); } [Fact] public void Test_FormatPdfText() { // Standalone PDF text extraction using extract_bytes_sync var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null); Assert.True(result.Content.Length >= 50, "expected length >= 50"); Assert.True(result.Content.ToString().Contains("Mallori") || result.Content.ToString().Contains("May"), "expected to contain at least one of the specified values"); } [Fact] public void Test_FormatPptx() { // PPTX presentation extraction using extract_file_sync var result = KreuzbergLib.ExtractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", null); } [Fact] public void Test_FormatXlsx() { // XLSX spreadsheet extraction using extract_file_sync var result = KreuzbergLib.ExtractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null); } } }