74 lines
2.8 KiB
C#
74 lines
2.8 KiB
C#
|
|
// This file is auto-generated by alef — DO NOT EDIT.
|
||
|
|
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||
|
|
// To regenerate: alef generate
|
||
|
|
// To verify freshness: alef verify --exit-code
|
||
|
|
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||
|
|
|
||
|
|
using System;
|
||
|
|
using System.Collections.Generic;
|
||
|
|
using System.Linq;
|
||
|
|
using System.Net.Http;
|
||
|
|
using System.Text;
|
||
|
|
using System.Text.Json;
|
||
|
|
using System.Text.Json.Serialization;
|
||
|
|
using System.Threading.Tasks;
|
||
|
|
using Xunit;
|
||
|
|
using Kreuzberg;
|
||
|
|
using static Kreuzberg.KreuzbergLib;
|
||
|
|
|
||
|
|
namespace Kreuzberg
|
||
|
|
{
|
||
|
|
/// <summary>E2e tests for category: format_specific.</summary>
|
||
|
|
public class FormatSpecificTests
|
||
|
|
{
|
||
|
|
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||
|
|
|
||
|
|
[Fact]
|
||
|
|
public void Test_FormatDocxStandalone()
|
||
|
|
{
|
||
|
|
// Standalone DOCX extraction using extract_bytes_sync
|
||
|
|
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null);
|
||
|
|
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
[Fact]
|
||
|
|
public void Test_FormatHwpxStandalone()
|
||
|
|
{
|
||
|
|
// Standalone HWPX extraction using extract_bytes_sync
|
||
|
|
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("hwpx/simple.hwpx"), "application/haansofthwpx", null);
|
||
|
|
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||
|
|
Assert.Contains("hello from hwpx", result.Content.ToString().ToLower());
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
[Fact]
|
||
|
|
public void Test_FormatPdfText()
|
||
|
|
{
|
||
|
|
// Standalone PDF text extraction using extract_bytes_sync
|
||
|
|
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
|
||
|
|
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||
|
|
Assert.True(result.Content.ToString().Contains("Mallori") || result.Content.ToString().Contains("May"), "expected to contain at least one of the specified values");
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
[Fact]
|
||
|
|
public void Test_FormatPptx()
|
||
|
|
{
|
||
|
|
// PPTX presentation extraction using extract_file_sync
|
||
|
|
var result = KreuzbergLib.ExtractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", null);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
[Fact]
|
||
|
|
public void Test_FormatXlsx()
|
||
|
|
{
|
||
|
|
// XLSX spreadsheet extraction using extract_file_sync
|
||
|
|
var result = KreuzbergLib.ExtractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
}
|
||
|
|
}
|