Files
fil/e2e/csharp/tests/SmokeTests.cs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

121 lines
6.1 KiB
C#
Generated

// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: smoke.</summary>
public class SmokeTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_OcrImagePng()
{
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("images/test_hello_world.png"), "image/png", ExtractionConfig.FromJson("{}"));
Assert.Equal("image/png", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 1, "expected length >= 1");
Assert.True(result.Content.ToString().Contains("Hello") || result.Content.ToString().Contains("World") || result.Content.ToString().Contains("hello") || result.Content.ToString().Contains("world"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeDocxBasic()
{
// Smoke test: DOCX with formatted text
var result = await KreuzbergLib.ExtractFileAsync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", new ExtractionConfig());
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 20, "expected length >= 20");
Assert.True(result.Content.ToString().Contains("Lorem") || result.Content.ToString().Contains("ipsum") || result.Content.ToString().Contains("document") || result.Content.ToString().Contains("text"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeHtmlBasic()
{
// Smoke test: HTML table extraction
var result = await KreuzbergLib.ExtractFileAsync("html/simple_table.html", "text/html", new ExtractionConfig());
Assert.Equal("text/html", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("Sample Data Table") || result.Content.ToString().Contains("Laptop") || result.Content.ToString().Contains("Electronics") || result.Content.ToString().Contains("Product"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeImagePng()
{
// Smoke test: PNG image (without OCR, metadata only)
var result = await KreuzbergLib.ExtractFileAsync("images/sample.png", null, new ExtractionConfig { DisableOcr = true });
Assert.Equal("image/png", result.MimeType!.Trim());
}
[Fact]
public async Task Test_SmokeJsonBasic()
{
// Smoke test: JSON file extraction
var result = await KreuzbergLib.ExtractFileAsync("json/simple.json", "application/json", new ExtractionConfig());
Assert.Equal("application/json", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public async Task Test_SmokePdfBasic()
{
// Smoke test: PDF with simple text extraction
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", "application/pdf", new ExtractionConfig());
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 50, "expected length >= 50");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("To Whom it May Concern"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeTxtBasic()
{
// Smoke test: Plain text file
var result = await KreuzbergLib.ExtractFileAsync("text/report.txt", "text/plain", new ExtractionConfig());
Assert.Equal("text/plain", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public async Task Test_SmokeXlsxBasic()
{
// Smoke test: XLSX with basic spreadsheet data including tables
var result = await KreuzbergLib.ExtractFileAsync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", new ExtractionConfig());
Assert.Equal("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 100, "expected length >= 100");
Assert.Contains("team", result.Content.ToString().ToLower());
Assert.Contains("location", result.Content.ToString().ToLower());
Assert.Contains("stanley cups", result.Content.ToString().ToLower());
Assert.Contains("blues", result.Content.ToString().ToLower());
Assert.Contains("flyers", result.Content.ToString().ToLower());
Assert.Contains("maple leafs", result.Content.ToString().ToLower());
Assert.Contains("stl", result.Content.ToString().ToLower());
Assert.Contains("phi", result.Content.ToString().ToLower());
Assert.Contains("tor", result.Content.ToString().ToLower());
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}
}