This commit is contained in:
20
e2e/csharp/Kreuzberg.E2eTests.csproj
generated
Normal file
20
e2e/csharp/Kreuzberg.E2eTests.csproj
generated
Normal file
@@ -0,0 +1,20 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<IsPackable>false</IsPackable>
|
||||
<IsTestProject>true</IsTestProject>
|
||||
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.5.1" />
|
||||
<PackageReference Include="xunit" Version="2.9.3" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.5" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../../packages/csharp/Kreuzberg/Kreuzberg.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
42
e2e/csharp/TestSetup.cs
generated
Normal file
42
e2e/csharp/TestSetup.cs
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
internal static class TestSetup
|
||||
{
|
||||
[ModuleInitializer]
|
||||
internal static void Init()
|
||||
{
|
||||
// Walk up from the assembly directory until we find the repo root.
|
||||
// Prefer a sibling test_documents/ directory (chdir into it so that
|
||||
// fixture paths like "docx/fake.docx" resolve relative to it). If that
|
||||
// is absent (web-crawler-style repos with no document fixtures), fall
|
||||
// back to a sibling alef.toml or fixtures/ marker as the repo root.
|
||||
var dir = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
DirectoryInfo? repoRoot = null;
|
||||
while (dir != null)
|
||||
{
|
||||
var documentsCandidate = Path.Combine(dir.FullName, "test_documents");
|
||||
if (Directory.Exists(documentsCandidate))
|
||||
{
|
||||
repoRoot = dir;
|
||||
Directory.SetCurrentDirectory(documentsCandidate);
|
||||
break;
|
||||
}
|
||||
if (File.Exists(Path.Combine(dir.FullName, "alef.toml"))
|
||||
|| Directory.Exists(Path.Combine(dir.FullName, "fixtures")))
|
||||
{
|
||||
repoRoot = dir;
|
||||
break;
|
||||
}
|
||||
dir = dir.Parent;
|
||||
}
|
||||
}
|
||||
}
|
||||
58
e2e/csharp/tests/AsyncTests.cs
generated
Normal file
58
e2e/csharp/tests/AsyncTests.cs
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: async.</summary>
|
||||
public class AsyncTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_AsyncExtractBytes()
|
||||
{
|
||||
// Async extract_bytes call on PDF document
|
||||
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_AsyncExtractBytesEmptyMime()
|
||||
{
|
||||
// extract_bytes empty MIME async
|
||||
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
|
||||
{
|
||||
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "", ExtractionConfig.FromJson("{}"));
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_AsyncExtractBytesInvalidMime()
|
||||
{
|
||||
// extract_bytes unsupported MIME async
|
||||
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
|
||||
{
|
||||
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", ExtractionConfig.FromJson("{}"));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
110
e2e/csharp/tests/BatchTests.cs
generated
Normal file
110
e2e/csharp/tests/BatchTests.cs
generated
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: batch.</summary>
|
||||
public class BatchTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchBytesInvalidMime()
|
||||
{
|
||||
// batch_extract_bytes_sync invalid MIME
|
||||
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111 }, MimeType = "application/x-nonexistent" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchExtractBytesHappy()
|
||||
{
|
||||
// batch_extract_bytes: happy path with mixed inputs
|
||||
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111, (byte)44, (byte)32, (byte)119, (byte)111, (byte)114, (byte)108, (byte)100, (byte)33 }, MimeType = "text/plain" }, new BatchBytesItem { Content = new byte[] { (byte)60, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62, (byte)60, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)84, (byte)101, (byte)115, (byte)116, (byte)60, (byte)47, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)60, (byte)47, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62 }, MimeType = "text/html" } }, null);
|
||||
Assert.True(result.Count >= 1, "expected at least 1 elements");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchExtractBytesMixedFormat()
|
||||
{
|
||||
// batch_extract_bytes: handles unsupported MIME gracefully
|
||||
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)80, (byte)68, (byte)70, (byte)32, (byte)112, (byte)108, (byte)97, (byte)99, (byte)101, (byte)104, (byte)111, (byte)108, (byte)100, (byte)101, (byte)114 }, MimeType = "application/x-unknown" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchExtractBytesSyncEmptyList()
|
||||
{
|
||||
// batch_extract_bytes_sync: empty batch
|
||||
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { }, null);
|
||||
Assert.Equal(0, result.Count);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchExtractBytesSyncInvalidMime()
|
||||
{
|
||||
// batch_extract_bytes_sync: unsupported MIME
|
||||
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)100, (byte)97, (byte)116, (byte)97 }, MimeType = "application/x-unknown" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchFileAsyncBasic()
|
||||
{
|
||||
// Extract text from multiple files asynchronously
|
||||
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchFileAsyncNotFound()
|
||||
{
|
||||
// batch_extract_file async nonexistent
|
||||
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchFileNotFound()
|
||||
{
|
||||
// batch_extract_file_sync nonexistent
|
||||
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" }, new BatchFileItem { Path = "/nonexistent/b.txt" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchFilePartial()
|
||||
{
|
||||
// batch_extract_file_sync mixed
|
||||
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "text/plain.txt" }, new BatchFileItem { Path = "/nonexistent/missing.pdf" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchFileSyncBasic()
|
||||
{
|
||||
// Extract text from multiple files synchronously
|
||||
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
40
e2e/csharp/tests/CodeTests.cs
generated
Normal file
40
e2e/csharp/tests/CodeTests.cs
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: code.</summary>
|
||||
public class CodeTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_CodeShebangDetection()
|
||||
{
|
||||
// Test language detection from shebang line via bytes input
|
||||
var result = KreuzbergLib.ExtractFileSync("code/script.sh", "text/x-source-code", null);
|
||||
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.Contains("build", result.Content.ToString().ToLower());
|
||||
Assert.Contains("clean", result.Content.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
203
e2e/csharp/tests/ContractTests.cs
generated
Normal file
203
e2e/csharp/tests/ContractTests.cs
generated
Normal file
@@ -0,0 +1,203 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: contract.</summary>
|
||||
public class ContractTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchBytesAsync()
|
||||
{
|
||||
// Tests async batch bytes extraction API (batch_extract_bytes)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchBytesWithConfigsAsync()
|
||||
{
|
||||
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchFileAsync()
|
||||
{
|
||||
// Tests async batch file extraction API (batch_extract_file)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchFileWithConfigsAsync()
|
||||
{
|
||||
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiExtractBytesAsync()
|
||||
{
|
||||
// Tests async bytes extraction API (extract_bytes)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiExtractFileAsync()
|
||||
{
|
||||
// Tests async file extraction API (extract_file)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigChunkingPrependHeadingContext()
|
||||
{
|
||||
// Tests markdown chunker prepends heading hierarchy to chunk content
|
||||
var result = KreuzbergLib.ExtractFileSync("markdown/extraction_test.md", null, ExtractionConfig.FromJson("{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}"));
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'chunks' not available on result typeAssert.True((result.Chunks ?? new()).All(c => !string.IsNullOrEmpty(c.Content)));
|
||||
Assert.True((result.Chunks ?? new()).All(c => c.Metadata?.HeadingContext != null));
|
||||
Assert.True((result.Chunks ?? new()).FirstOrDefault()?.Metadata?.HeadingContext != null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigDocumentStructureWithHeadings()
|
||||
{
|
||||
// Tests document structure with DOCX heading-driven nesting
|
||||
var result = KreuzbergLib.ExtractFileSync("docx/fake.docx", null, ExtractionConfig.FromJson("{\"include_document_structure\":true}"));
|
||||
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
|
||||
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigElementTypes()
|
||||
{
|
||||
// Tests element-based result format with element type assertions on DOCX
|
||||
var result = KreuzbergLib.ExtractFileSync("docx/unit_test_headers.docx", null, ExtractionConfig.FromJson("{\"result_format\":\"element_based\"}"));
|
||||
Assert.True(result.MimeType.ToString().Contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values");
|
||||
// skipped: field 'elements' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigExtractionTimeout()
|
||||
{
|
||||
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"extraction_timeout_secs\":300}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigKeywords()
|
||||
{
|
||||
// Tests keyword extraction via YAKE algorithm
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'keywords' not available on C# ExtractionResult // skipped: field 'keywords' not available on C# ExtractionResult
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigPages()
|
||||
{
|
||||
// Tests page extraction and page marker configuration
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("PAGE"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigQualityEnabled()
|
||||
{
|
||||
// Tests quality scoring produces a score value in [0.0, 1.0]
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"enable_quality_processing\":true}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigSecurityLimits()
|
||||
{
|
||||
// Tests archive extraction with custom security limits
|
||||
var result = KreuzbergLib.ExtractFileSync("archives/documents.zip", null, ExtractionConfig.FromJson("{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}"));
|
||||
Assert.True(result.MimeType.ToString().Contains("application/zip") || result.MimeType.ToString().Contains("application/x-zip-compressed"), "expected to contain at least one of the specified values");
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigTreeSitter()
|
||||
{
|
||||
// Tests tree-sitter configuration round-trip
|
||||
var result = KreuzbergLib.ExtractFileSync("code/hello.py", null, ExtractionConfig.FromJson("{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}"));
|
||||
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OutputFormatBytesMarkdown()
|
||||
{
|
||||
// Tests markdown output format via bytes extraction API
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OutputFormatMarkdown()
|
||||
{
|
||||
// Tests Markdown output format
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"output_format\":\"markdown\"}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
62
e2e/csharp/tests/DetectionTests.cs
generated
Normal file
62
e2e/csharp/tests/DetectionTests.cs
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: detection.</summary>
|
||||
public class DetectionTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_DetectMimeBytesHtml()
|
||||
{
|
||||
// Detect HTML MIME from bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("html/html.html"));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_DetectMimeBytesPdf()
|
||||
{
|
||||
// Detect PDF MIME type from bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_DetectMimeBytesPng()
|
||||
{
|
||||
// Detect PNG MIME type from bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetExtensionsUnknownMime()
|
||||
{
|
||||
// get_extensions unknown MIME
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.GetExtensionsForMime("application/x-totally-unknown");
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/DocumentExtractorManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/DocumentExtractorManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: document_extractor_management.</summary>
|
||||
public class DocumentExtractorManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_DocumentExtractorsClear()
|
||||
{
|
||||
// Clear all document extractors and verify list is empty
|
||||
KreuzbergLib.ClearDocumentExtractors();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtractorsList()
|
||||
{
|
||||
// List all registered document extractors
|
||||
var result = KreuzbergLib.ListDocumentExtractors();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
54
e2e/csharp/tests/EmbedAsyncPendingTests.cs
generated
Normal file
54
e2e/csharp/tests/EmbedAsyncPendingTests.cs
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embed_async_pending.</summary>
|
||||
public class EmbedAsyncPendingTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_EmbedTextsAsyncEmptyInput()
|
||||
{
|
||||
// embed_texts_async: empty text list
|
||||
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { }, null);
|
||||
Assert.True(result.Count == 0);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_EmbedTextsAsyncHappy()
|
||||
{
|
||||
// embed_texts_async: basic async embedding
|
||||
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"First\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Second\"", ConfigOptions)! }, null);
|
||||
Assert.True(result.Count >= 2);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_EmbedTextsAsyncPresetSwitch()
|
||||
{
|
||||
// embed_texts_async: preset override
|
||||
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"Text\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
36
e2e/csharp/tests/EmbedExtraTests.cs
generated
Normal file
36
e2e/csharp/tests/EmbedExtraTests.cs
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embed_extra.</summary>
|
||||
public class EmbedExtraTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbedTextsBatch()
|
||||
{
|
||||
// Batch embed texts
|
||||
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"World\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/EmbeddingBackendManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/EmbeddingBackendManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embedding_backend_management.</summary>
|
||||
public class EmbeddingBackendManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbeddingBackendsClear()
|
||||
{
|
||||
// Clear all embedding backends and verify list is empty
|
||||
KreuzbergLib.ClearEmbeddingBackends();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbeddingBackendsList()
|
||||
{
|
||||
// List all registered embedding backends
|
||||
var result = KreuzbergLib.ListEmbeddingBackends();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
71
e2e/csharp/tests/EmbeddingsTests.cs
generated
Normal file
71
e2e/csharp/tests/EmbeddingsTests.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embeddings.</summary>
|
||||
public class EmbeddingsTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbedTextsDifferentPreset()
|
||||
{
|
||||
// embed_texts: multilingual preset
|
||||
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello world\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Test\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"multilingual\",\"type\":\"preset\"}", ConfigOptions)! });
|
||||
Assert.True(result.Count >= 2);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetEmbeddingPresetKnown()
|
||||
{
|
||||
// get_embedding_preset: known preset
|
||||
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetEmbeddingPresetNominal()
|
||||
{
|
||||
// get_embedding_preset: nominal case
|
||||
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetEmbeddingPresetUnknown()
|
||||
{
|
||||
// get_embedding_preset: unknown preset fails
|
||||
var result = KreuzbergLib.GetEmbeddingPreset("nonexistent-xyz");
|
||||
Assert.True(string.IsNullOrEmpty(result?.ToString()));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListEmbeddingPresetsSanity()
|
||||
{
|
||||
// list_embedding_presets: returns at least one
|
||||
var result = KreuzbergLib.ListEmbeddingPresets();
|
||||
Assert.NotEmpty(result);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
76
e2e/csharp/tests/ErrorTests.cs
generated
Normal file
76
e2e/csharp/tests/ErrorTests.cs
generated
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: error.</summary>
|
||||
public class ErrorTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorEmptyBytes()
|
||||
{
|
||||
// Graceful handling of empty bytes (should not error)
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/empty.txt"), "text/plain", new ExtractionConfig());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorEmptyMime()
|
||||
{
|
||||
// Error when extracting with empty MIME type
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "", new ExtractionConfig());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorExtractBytesConflictingOcr()
|
||||
{
|
||||
// extract_bytes force+disable OCR
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/fake_text.txt"), "text/plain", new ExtractionConfig { DisableOcr = true, ForceOcr = true });
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorInvalidMimeFormat()
|
||||
{
|
||||
// Error when extracting with invalid MIME type format
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "not-a-mime", new ExtractionConfig());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorUnsupportedMime()
|
||||
{
|
||||
// Error when extracting with unsupported MIME type
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", new ExtractionConfig());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
73
e2e/csharp/tests/FormatSpecificTests.cs
generated
Normal file
73
e2e/csharp/tests/FormatSpecificTests.cs
generated
Normal file
@@ -0,0 +1,73 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: format_specific.</summary>
|
||||
public class FormatSpecificTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatDocxStandalone()
|
||||
{
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null);
|
||||
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatHwpxStandalone()
|
||||
{
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("hwpx/simple.hwpx"), "application/haansofthwpx", null);
|
||||
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||||
Assert.Contains("hello from hwpx", result.Content.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatPdfText()
|
||||
{
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
|
||||
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||||
Assert.True(result.Content.ToString().Contains("Mallori") || result.Content.ToString().Contains("May"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatPptx()
|
||||
{
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
var result = KreuzbergLib.ExtractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatXlsx()
|
||||
{
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
var result = KreuzbergLib.ExtractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
55
e2e/csharp/tests/MimeUtilitiesTests.cs
generated
Normal file
55
e2e/csharp/tests/MimeUtilitiesTests.cs
generated
Normal file
@@ -0,0 +1,55 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: mime_utilities.</summary>
|
||||
public class MimeUtilitiesTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_MimeDetectBytes()
|
||||
{
|
||||
// Detect MIME type from file bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
|
||||
Assert.Contains("pdf", result.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_MimeDetectImage()
|
||||
{
|
||||
// Detect MIME type from PNG image bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
|
||||
Assert.Contains("png", result.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_MimeGetExtensions()
|
||||
{
|
||||
// Get file extensions for a MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
|
||||
Assert.Contains("pdf", JsonSerializer.Serialize(result).ToLower());
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
50
e2e/csharp/tests/OcrBackendManagementTests.cs
generated
Normal file
50
e2e/csharp/tests/OcrBackendManagementTests.cs
generated
Normal file
@@ -0,0 +1,50 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: ocr_backend_management.</summary>
|
||||
public class OcrBackendManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_OcrBackendsClear()
|
||||
{
|
||||
// Clear all OCR backends and verify list is empty
|
||||
KreuzbergLib.ClearOcrBackends();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OcrBackendsList()
|
||||
{
|
||||
// List all registered OCR backends
|
||||
var result = KreuzbergLib.ListOcrBackends();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OcrBackendsUnregister()
|
||||
{
|
||||
// Unregister nonexistent OCR backend gracefully
|
||||
KreuzbergLib.UnregisterOcrBackend("nonexistent-backend-xyz");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
48
e2e/csharp/tests/PdfTests.cs
generated
Normal file
48
e2e/csharp/tests/PdfTests.cs
generated
Normal file
@@ -0,0 +1,48 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: pdf.</summary>
|
||||
public class PdfTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderPdfPageFirst()
|
||||
{
|
||||
// render_pdf_page_to_png: first page
|
||||
var result = KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 0, null, null);
|
||||
Assert.NotNull(result);
|
||||
// skipped: assertion type 'min_length' not supported on byte[] result
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderPdfPageOutOfRange()
|
||||
{
|
||||
// render_pdf_page_to_png: page out of range
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 999, null, null);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
234
e2e/csharp/tests/PluginApiTests.cs
generated
Normal file
234
e2e/csharp/tests/PluginApiTests.cs
generated
Normal file
@@ -0,0 +1,234 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: plugin_api.</summary>
|
||||
public class PluginApiTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterDocumentExtractorTraitBridge()
|
||||
{
|
||||
// register_document_extractor: trait bridge
|
||||
KreuzbergLib.RegisterDocumentExtractor(DocumentExtractorBridge.Register(new TestStub_RegisterDocumentExtractorTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterEmbeddingBackendTraitBridge()
|
||||
{
|
||||
// register_embedding_backend: trait bridge
|
||||
KreuzbergLib.RegisterEmbeddingBackend(EmbeddingBackendBridge.Register(new TestStub_RegisterEmbeddingBackendTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterOcrBackendTraitBridge()
|
||||
{
|
||||
// register_ocr_backend: trait bridge
|
||||
KreuzbergLib.RegisterOcrBackend(OcrBackendBridge.Register(new TestStub_RegisterOcrBackendTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterPostProcessorTraitBridge()
|
||||
{
|
||||
// register_post_processor: trait bridge
|
||||
KreuzbergLib.RegisterPostProcessor(PostProcessorBridge.Register(new TestStub_RegisterPostProcessorTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterRendererTraitBridge()
|
||||
{
|
||||
// register_renderer: trait bridge
|
||||
KreuzbergLib.RegisterRenderer(RendererBridge.Register(new TestStub_RegisterRendererTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterValidatorTraitBridge()
|
||||
{
|
||||
// register_validator: trait bridge
|
||||
KreuzbergLib.RegisterValidator(ValidatorBridge.Register(new TestStub_RegisterValidatorTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterDocumentExtractorAfterRegister()
|
||||
{
|
||||
// unregister_document_extractor
|
||||
KreuzbergLib.UnregisterDocumentExtractor("test-extractor");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterEmbeddingBackendAfterRegister()
|
||||
{
|
||||
// unregister_embedding_backend
|
||||
KreuzbergLib.UnregisterEmbeddingBackend("test-embedding-backend");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterPostProcessorAfterRegister()
|
||||
{
|
||||
// unregister_post_processor
|
||||
KreuzbergLib.UnregisterPostProcessor("test-processor");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterRendererAfterRegister()
|
||||
{
|
||||
// unregister_renderer
|
||||
KreuzbergLib.UnregisterRenderer("test-renderer");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterValidatorAfterRegister()
|
||||
{
|
||||
// unregister_validator
|
||||
KreuzbergLib.UnregisterValidator("test-validator");
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterDocumentExtractorTraitBridge : IDocumentExtractor
|
||||
{
|
||||
public string Name => "register_document_extractor_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public string ExtractBytes(byte[] content, string mimeType, ExtractionConfig config)
|
||||
=> "";
|
||||
public string ExtractFile(string path, string mimeType, ExtractionConfig config)
|
||||
=> "";
|
||||
public List<string> SupportedMimeTypes()
|
||||
=> [];
|
||||
public int Priority()
|
||||
=> 0;
|
||||
public bool CanHandle(string path, string mimeType)
|
||||
=> false;
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterEmbeddingBackendTraitBridge : IEmbeddingBackend
|
||||
{
|
||||
public string Name => "register_embedding_backend_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public ulong Dimensions()
|
||||
=> 0;
|
||||
public List<List<float>> Embed(List<string> texts)
|
||||
=> [];
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterOcrBackendTraitBridge : IOcrBackend
|
||||
{
|
||||
public string Name => "register_ocr_backend_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public ExtractionResult ProcessImage(byte[] imageBytes, OcrConfig config)
|
||||
=> new ExtractionResult();
|
||||
public ExtractionResult ProcessImageFile(string path, OcrConfig config)
|
||||
=> new ExtractionResult();
|
||||
public bool SupportsLanguage(string lang)
|
||||
=> false;
|
||||
public OcrBackendType BackendType()
|
||||
=> OcrBackendType.Tesseract;
|
||||
public List<string> SupportedLanguages()
|
||||
=> [];
|
||||
public bool SupportsTableDetection()
|
||||
=> false;
|
||||
public bool SupportsDocumentProcessing()
|
||||
=> false;
|
||||
public ExtractionResult ProcessDocument(string path, OcrConfig config)
|
||||
=> new ExtractionResult();
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterPostProcessorTraitBridge : IPostProcessor
|
||||
{
|
||||
public string Name => "register_post_processor_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public void Process(ExtractionResult result, ExtractionConfig config) { }
|
||||
public ProcessingStage ProcessingStage()
|
||||
=> ProcessingStage.Early;
|
||||
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
|
||||
=> false;
|
||||
public ulong EstimatedDurationMs(ExtractionResult result)
|
||||
=> 0;
|
||||
public int Priority()
|
||||
=> 0;
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterRendererTraitBridge : IRenderer
|
||||
{
|
||||
public string Name => "register_renderer_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public string Render(string doc)
|
||||
=> "";
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterValidatorTraitBridge : IValidator
|
||||
{
|
||||
public string Name => "register_validator_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public void Validate(ExtractionResult result, ExtractionConfig config) { }
|
||||
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
|
||||
=> false;
|
||||
public int Priority()
|
||||
=> 0;
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/PostProcessorManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/PostProcessorManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: post_processor_management.</summary>
|
||||
public class PostProcessorManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_PostProcessorsClear()
|
||||
{
|
||||
// Clear all post-processors and verify list is empty
|
||||
KreuzbergLib.ClearPostProcessors();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_PostProcessorsList()
|
||||
{
|
||||
// List all registered post-processors
|
||||
var result = KreuzbergLib.ListPostProcessors();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
52
e2e/csharp/tests/RegistryOperationsTests.cs
generated
Normal file
52
e2e/csharp/tests/RegistryOperationsTests.cs
generated
Normal file
@@ -0,0 +1,52 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: registry_operations.</summary>
|
||||
public class RegistryOperationsTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtensionsDocx()
|
||||
{
|
||||
// Get file extensions for DOCX MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtensionsHtml()
|
||||
{
|
||||
// Get file extensions for HTML MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("text/html");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtensionsPdf()
|
||||
{
|
||||
// Get file extensions for PDF MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
76
e2e/csharp/tests/RegistryTests.cs
generated
Normal file
76
e2e/csharp/tests/RegistryTests.cs
generated
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: registry.</summary>
|
||||
public class RegistryTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ListDocumentExtractors()
|
||||
{
|
||||
// List document extractors
|
||||
var result = KreuzbergLib.ListDocumentExtractors();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListEmbeddingBackends()
|
||||
{
|
||||
// List embedding backends
|
||||
var result = KreuzbergLib.ListEmbeddingBackends();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListOcrBackends()
|
||||
{
|
||||
// List OCR backends
|
||||
var result = KreuzbergLib.ListOcrBackends();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListPostProcessors()
|
||||
{
|
||||
// List post-processors
|
||||
var result = KreuzbergLib.ListPostProcessors();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListRenderers()
|
||||
{
|
||||
// List renderers
|
||||
var result = KreuzbergLib.ListRenderers();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListValidators()
|
||||
{
|
||||
// List validators
|
||||
var result = KreuzbergLib.ListValidators();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/RendererManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/RendererManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: renderer_management.</summary>
|
||||
public class RendererManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderersClear()
|
||||
{
|
||||
// Clear all renderers and verify list is empty
|
||||
KreuzbergLib.ClearRenderers();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderersList()
|
||||
{
|
||||
// List all registered renderers
|
||||
var result = KreuzbergLib.ListRenderers();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
120
e2e/csharp/tests/SmokeTests.cs
generated
Normal file
120
e2e/csharp/tests/SmokeTests.cs
generated
Normal file
@@ -0,0 +1,120 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: smoke.</summary>
|
||||
public class SmokeTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_OcrImagePng()
|
||||
{
|
||||
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
|
||||
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("images/test_hello_world.png"), "image/png", ExtractionConfig.FromJson("{}"));
|
||||
Assert.Equal("image/png", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 1, "expected length >= 1");
|
||||
Assert.True(result.Content.ToString().Contains("Hello") || result.Content.ToString().Contains("World") || result.Content.ToString().Contains("hello") || result.Content.ToString().Contains("world"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeDocxBasic()
|
||||
{
|
||||
// Smoke test: DOCX with formatted text
|
||||
var result = await KreuzbergLib.ExtractFileAsync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", new ExtractionConfig());
|
||||
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||||
Assert.True(result.Content.ToString().Contains("Lorem") || result.Content.ToString().Contains("ipsum") || result.Content.ToString().Contains("document") || result.Content.ToString().Contains("text"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeHtmlBasic()
|
||||
{
|
||||
// Smoke test: HTML table extraction
|
||||
var result = await KreuzbergLib.ExtractFileAsync("html/simple_table.html", "text/html", new ExtractionConfig());
|
||||
Assert.Equal("text/html", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("Sample Data Table") || result.Content.ToString().Contains("Laptop") || result.Content.ToString().Contains("Electronics") || result.Content.ToString().Contains("Product"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeImagePng()
|
||||
{
|
||||
// Smoke test: PNG image (without OCR, metadata only)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("images/sample.png", null, new ExtractionConfig { DisableOcr = true });
|
||||
Assert.Equal("image/png", result.MimeType!.Trim());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeJsonBasic()
|
||||
{
|
||||
// Smoke test: JSON file extraction
|
||||
var result = await KreuzbergLib.ExtractFileAsync("json/simple.json", "application/json", new ExtractionConfig());
|
||||
Assert.Equal("application/json", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokePdfBasic()
|
||||
{
|
||||
// Smoke test: PDF with simple text extraction
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", "application/pdf", new ExtractionConfig());
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("To Whom it May Concern"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeTxtBasic()
|
||||
{
|
||||
// Smoke test: Plain text file
|
||||
var result = await KreuzbergLib.ExtractFileAsync("text/report.txt", "text/plain", new ExtractionConfig());
|
||||
Assert.Equal("text/plain", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeXlsxBasic()
|
||||
{
|
||||
// Smoke test: XLSX with basic spreadsheet data including tables
|
||||
var result = await KreuzbergLib.ExtractFileAsync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", new ExtractionConfig());
|
||||
Assert.Equal("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 100, "expected length >= 100");
|
||||
Assert.Contains("team", result.Content.ToString().ToLower());
|
||||
Assert.Contains("location", result.Content.ToString().ToLower());
|
||||
Assert.Contains("stanley cups", result.Content.ToString().ToLower());
|
||||
Assert.Contains("blues", result.Content.ToString().ToLower());
|
||||
Assert.Contains("flyers", result.Content.ToString().ToLower());
|
||||
Assert.Contains("maple leafs", result.Content.ToString().ToLower());
|
||||
Assert.Contains("stl", result.Content.ToString().ToLower());
|
||||
Assert.Contains("phi", result.Content.ToString().ToLower());
|
||||
Assert.Contains("tor", result.Content.ToString().ToLower());
|
||||
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/ValidatorManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/ValidatorManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: validator_management.</summary>
|
||||
public class ValidatorManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ValidatorsClear()
|
||||
{
|
||||
// Clear all validators and verify list is empty
|
||||
KreuzbergLib.ClearValidators();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ValidatorsList()
|
||||
{
|
||||
// List all registered validators
|
||||
var result = KreuzbergLib.ListValidators();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user