Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

58
e2e/csharp/tests/AsyncTests.cs generated Normal file
View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: async.</summary>
public class AsyncTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_AsyncExtractBytes()
{
// Async extract_bytes call on PDF document
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 50, "expected length >= 50");
}
[Fact]
public async Task Test_AsyncExtractBytesEmptyMime()
{
// extract_bytes empty MIME async
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
{
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "", ExtractionConfig.FromJson("{}"));
});
}
[Fact]
public async Task Test_AsyncExtractBytesInvalidMime()
{
// extract_bytes unsupported MIME async
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
{
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", ExtractionConfig.FromJson("{}"));
});
}
}
}

110
e2e/csharp/tests/BatchTests.cs generated Normal file
View File

@@ -0,0 +1,110 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: batch.</summary>
public class BatchTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_BatchBytesInvalidMime()
{
// batch_extract_bytes_sync invalid MIME
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111 }, MimeType = "application/x-nonexistent" } }, null);
}
[Fact]
public async Task Test_BatchExtractBytesHappy()
{
// batch_extract_bytes: happy path with mixed inputs
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111, (byte)44, (byte)32, (byte)119, (byte)111, (byte)114, (byte)108, (byte)100, (byte)33 }, MimeType = "text/plain" }, new BatchBytesItem { Content = new byte[] { (byte)60, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62, (byte)60, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)84, (byte)101, (byte)115, (byte)116, (byte)60, (byte)47, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)60, (byte)47, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62 }, MimeType = "text/html" } }, null);
Assert.True(result.Count >= 1, "expected at least 1 elements");
}
[Fact]
public async Task Test_BatchExtractBytesMixedFormat()
{
// batch_extract_bytes: handles unsupported MIME gracefully
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)80, (byte)68, (byte)70, (byte)32, (byte)112, (byte)108, (byte)97, (byte)99, (byte)101, (byte)104, (byte)111, (byte)108, (byte)100, (byte)101, (byte)114 }, MimeType = "application/x-unknown" } }, null);
}
[Fact]
public void Test_BatchExtractBytesSyncEmptyList()
{
// batch_extract_bytes_sync: empty batch
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { }, null);
Assert.Equal(0, result.Count);
}
[Fact]
public void Test_BatchExtractBytesSyncInvalidMime()
{
// batch_extract_bytes_sync: unsupported MIME
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)100, (byte)97, (byte)116, (byte)97 }, MimeType = "application/x-unknown" } }, null);
}
[Fact]
public async Task Test_BatchFileAsyncBasic()
{
// Extract text from multiple files asynchronously
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
}
[Fact]
public async Task Test_BatchFileAsyncNotFound()
{
// batch_extract_file async nonexistent
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" } }, null);
}
[Fact]
public void Test_BatchFileNotFound()
{
// batch_extract_file_sync nonexistent
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" }, new BatchFileItem { Path = "/nonexistent/b.txt" } }, null);
}
[Fact]
public void Test_BatchFilePartial()
{
// batch_extract_file_sync mixed
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "text/plain.txt" }, new BatchFileItem { Path = "/nonexistent/missing.pdf" } }, null);
}
[Fact]
public void Test_BatchFileSyncBasic()
{
// Extract text from multiple files synchronously
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
}
}
}

40
e2e/csharp/tests/CodeTests.cs generated Normal file
View File

@@ -0,0 +1,40 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: code.</summary>
public class CodeTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_CodeShebangDetection()
{
// Test language detection from shebang line via bytes input
var result = KreuzbergLib.ExtractFileSync("code/script.sh", "text/x-source-code", null);
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.Contains("build", result.Content.ToString().ToLower());
Assert.Contains("clean", result.Content.ToString().ToLower());
}
}
}

203
e2e/csharp/tests/ContractTests.cs generated Normal file
View File

@@ -0,0 +1,203 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: contract.</summary>
public class ContractTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_ApiBatchBytesAsync()
{
// Tests async batch bytes extraction API (batch_extract_bytes)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiBatchBytesWithConfigsAsync()
{
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public async Task Test_ApiBatchFileAsync()
{
// Tests async batch file extraction API (batch_extract_file)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiBatchFileWithConfigsAsync()
{
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public async Task Test_ApiExtractBytesAsync()
{
// Tests async bytes extraction API (extract_bytes)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiExtractFileAsync()
{
// Tests async file extraction API (extract_file)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_ConfigChunkingPrependHeadingContext()
{
// Tests markdown chunker prepends heading hierarchy to chunk content
var result = KreuzbergLib.ExtractFileSync("markdown/extraction_test.md", null, ExtractionConfig.FromJson("{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}"));
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'chunks' not available on result typeAssert.True((result.Chunks ?? new()).All(c => !string.IsNullOrEmpty(c.Content)));
Assert.True((result.Chunks ?? new()).All(c => c.Metadata?.HeadingContext != null));
Assert.True((result.Chunks ?? new()).FirstOrDefault()?.Metadata?.HeadingContext != null);
}
[Fact]
public void Test_ConfigDocumentStructureWithHeadings()
{
// Tests document structure with DOCX heading-driven nesting
var result = KreuzbergLib.ExtractFileSync("docx/fake.docx", null, ExtractionConfig.FromJson("{\"include_document_structure\":true}"));
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
}
[Fact]
public void Test_ConfigElementTypes()
{
// Tests element-based result format with element type assertions on DOCX
var result = KreuzbergLib.ExtractFileSync("docx/unit_test_headers.docx", null, ExtractionConfig.FromJson("{\"result_format\":\"element_based\"}"));
Assert.True(result.MimeType.ToString().Contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values");
// skipped: field 'elements' not available on result type
}
[Fact]
public void Test_ConfigExtractionTimeout()
{
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"extraction_timeout_secs\":300}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
}
[Fact]
public void Test_ConfigKeywords()
{
// Tests keyword extraction via YAKE algorithm
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'keywords' not available on C# ExtractionResult // skipped: field 'keywords' not available on C# ExtractionResult
}
[Fact]
public void Test_ConfigPages()
{
// Tests page extraction and page marker configuration
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("PAGE"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_ConfigQualityEnabled()
{
// Tests quality scoring produces a score value in [0.0, 1.0]
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"enable_quality_processing\":true}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
}
[Fact]
public void Test_ConfigSecurityLimits()
{
// Tests archive extraction with custom security limits
var result = KreuzbergLib.ExtractFileSync("archives/documents.zip", null, ExtractionConfig.FromJson("{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}"));
Assert.True(result.MimeType.ToString().Contains("application/zip") || result.MimeType.ToString().Contains("application/x-zip-compressed"), "expected to contain at least one of the specified values");
Assert.True(result.Content.Length >= 10, "expected length >= 10");
}
[Fact]
public void Test_ConfigTreeSitter()
{
// Tests tree-sitter configuration round-trip
var result = KreuzbergLib.ExtractFileSync("code/hello.py", null, ExtractionConfig.FromJson("{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}"));
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public void Test_OutputFormatBytesMarkdown()
{
// Tests markdown output format via bytes extraction API
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public void Test_OutputFormatMarkdown()
{
// Tests Markdown output format
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"output_format\":\"markdown\"}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
}
}

62
e2e/csharp/tests/DetectionTests.cs generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: detection.</summary>
public class DetectionTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_DetectMimeBytesHtml()
{
// Detect HTML MIME from bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("html/html.html"));
}
[Fact]
public void Test_DetectMimeBytesPdf()
{
// Detect PDF MIME type from bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
}
[Fact]
public void Test_DetectMimeBytesPng()
{
// Detect PNG MIME type from bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
}
[Fact]
public void Test_GetExtensionsUnknownMime()
{
// get_extensions unknown MIME
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.GetExtensionsForMime("application/x-totally-unknown");
});
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: document_extractor_management.</summary>
public class DocumentExtractorManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_DocumentExtractorsClear()
{
// Clear all document extractors and verify list is empty
KreuzbergLib.ClearDocumentExtractors();
}
[Fact]
public void Test_ExtractorsList()
{
// List all registered document extractors
var result = KreuzbergLib.ListDocumentExtractors();
}
}
}

View File

@@ -0,0 +1,54 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embed_async_pending.</summary>
public class EmbedAsyncPendingTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_EmbedTextsAsyncEmptyInput()
{
// embed_texts_async: empty text list
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { }, null);
Assert.True(result.Count == 0);
}
[Fact]
public async Task Test_EmbedTextsAsyncHappy()
{
// embed_texts_async: basic async embedding
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"First\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Second\"", ConfigOptions)! }, null);
Assert.True(result.Count >= 2);
}
[Fact]
public async Task Test_EmbedTextsAsyncPresetSwitch()
{
// embed_texts_async: preset override
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"Text\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
}
}
}

36
e2e/csharp/tests/EmbedExtraTests.cs generated Normal file
View File

@@ -0,0 +1,36 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embed_extra.</summary>
public class EmbedExtraTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_EmbedTextsBatch()
{
// Batch embed texts
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"World\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embedding_backend_management.</summary>
public class EmbeddingBackendManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_EmbeddingBackendsClear()
{
// Clear all embedding backends and verify list is empty
KreuzbergLib.ClearEmbeddingBackends();
}
[Fact]
public void Test_EmbeddingBackendsList()
{
// List all registered embedding backends
var result = KreuzbergLib.ListEmbeddingBackends();
}
}
}

71
e2e/csharp/tests/EmbeddingsTests.cs generated Normal file
View File

@@ -0,0 +1,71 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embeddings.</summary>
public class EmbeddingsTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_EmbedTextsDifferentPreset()
{
// embed_texts: multilingual preset
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello world\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Test\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"multilingual\",\"type\":\"preset\"}", ConfigOptions)! });
Assert.True(result.Count >= 2);
}
[Fact]
public void Test_GetEmbeddingPresetKnown()
{
// get_embedding_preset: known preset
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
}
[Fact]
public void Test_GetEmbeddingPresetNominal()
{
// get_embedding_preset: nominal case
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
}
[Fact]
public void Test_GetEmbeddingPresetUnknown()
{
// get_embedding_preset: unknown preset fails
var result = KreuzbergLib.GetEmbeddingPreset("nonexistent-xyz");
Assert.True(string.IsNullOrEmpty(result?.ToString()));
}
[Fact]
public void Test_ListEmbeddingPresetsSanity()
{
// list_embedding_presets: returns at least one
var result = KreuzbergLib.ListEmbeddingPresets();
Assert.NotEmpty(result);
}
}
}

76
e2e/csharp/tests/ErrorTests.cs generated Normal file
View File

@@ -0,0 +1,76 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: error.</summary>
public class ErrorTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ErrorEmptyBytes()
{
// Graceful handling of empty bytes (should not error)
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/empty.txt"), "text/plain", new ExtractionConfig());
}
[Fact]
public void Test_ErrorEmptyMime()
{
// Error when extracting with empty MIME type
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "", new ExtractionConfig());
});
}
[Fact]
public void Test_ErrorExtractBytesConflictingOcr()
{
// extract_bytes force+disable OCR
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/fake_text.txt"), "text/plain", new ExtractionConfig { DisableOcr = true, ForceOcr = true });
});
}
[Fact]
public void Test_ErrorInvalidMimeFormat()
{
// Error when extracting with invalid MIME type format
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "not-a-mime", new ExtractionConfig());
});
}
[Fact]
public void Test_ErrorUnsupportedMime()
{
// Error when extracting with unsupported MIME type
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", new ExtractionConfig());
});
}
}
}

73
e2e/csharp/tests/FormatSpecificTests.cs generated Normal file
View File

@@ -0,0 +1,73 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: format_specific.</summary>
public class FormatSpecificTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_FormatDocxStandalone()
{
// Standalone DOCX extraction using extract_bytes_sync
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null);
Assert.True(result.Content.Length >= 20, "expected length >= 20");
}
[Fact]
public void Test_FormatHwpxStandalone()
{
// Standalone HWPX extraction using extract_bytes_sync
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("hwpx/simple.hwpx"), "application/haansofthwpx", null);
Assert.True(result.Content.Length >= 20, "expected length >= 20");
Assert.Contains("hello from hwpx", result.Content.ToString().ToLower());
}
[Fact]
public void Test_FormatPdfText()
{
// Standalone PDF text extraction using extract_bytes_sync
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
Assert.True(result.Content.Length >= 50, "expected length >= 50");
Assert.True(result.Content.ToString().Contains("Mallori") || result.Content.ToString().Contains("May"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_FormatPptx()
{
// PPTX presentation extraction using extract_file_sync
var result = KreuzbergLib.ExtractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", null);
}
[Fact]
public void Test_FormatXlsx()
{
// XLSX spreadsheet extraction using extract_file_sync
var result = KreuzbergLib.ExtractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null);
}
}
}

55
e2e/csharp/tests/MimeUtilitiesTests.cs generated Normal file
View File

@@ -0,0 +1,55 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: mime_utilities.</summary>
public class MimeUtilitiesTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_MimeDetectBytes()
{
// Detect MIME type from file bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
Assert.Contains("pdf", result.ToString().ToLower());
}
[Fact]
public void Test_MimeDetectImage()
{
// Detect MIME type from PNG image bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
Assert.Contains("png", result.ToString().ToLower());
}
[Fact]
public void Test_MimeGetExtensions()
{
// Get file extensions for a MIME type
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
Assert.Contains("pdf", JsonSerializer.Serialize(result).ToLower());
}
}
}

View File

@@ -0,0 +1,50 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: ocr_backend_management.</summary>
public class OcrBackendManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_OcrBackendsClear()
{
// Clear all OCR backends and verify list is empty
KreuzbergLib.ClearOcrBackends();
}
[Fact]
public void Test_OcrBackendsList()
{
// List all registered OCR backends
var result = KreuzbergLib.ListOcrBackends();
}
[Fact]
public void Test_OcrBackendsUnregister()
{
// Unregister nonexistent OCR backend gracefully
KreuzbergLib.UnregisterOcrBackend("nonexistent-backend-xyz");
}
}
}

48
e2e/csharp/tests/PdfTests.cs generated Normal file
View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: pdf.</summary>
public class PdfTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_RenderPdfPageFirst()
{
// render_pdf_page_to_png: first page
var result = KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 0, null, null);
Assert.NotNull(result);
// skipped: assertion type 'min_length' not supported on byte[] result
}
[Fact]
public void Test_RenderPdfPageOutOfRange()
{
// render_pdf_page_to_png: page out of range
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 999, null, null);
});
}
}
}

234
e2e/csharp/tests/PluginApiTests.cs generated Normal file
View File

@@ -0,0 +1,234 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: plugin_api.</summary>
public class PluginApiTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_RegisterDocumentExtractorTraitBridge()
{
// register_document_extractor: trait bridge
KreuzbergLib.RegisterDocumentExtractor(DocumentExtractorBridge.Register(new TestStub_RegisterDocumentExtractorTraitBridge()));
}
[Fact]
public void Test_RegisterEmbeddingBackendTraitBridge()
{
// register_embedding_backend: trait bridge
KreuzbergLib.RegisterEmbeddingBackend(EmbeddingBackendBridge.Register(new TestStub_RegisterEmbeddingBackendTraitBridge()));
}
[Fact]
public void Test_RegisterOcrBackendTraitBridge()
{
// register_ocr_backend: trait bridge
KreuzbergLib.RegisterOcrBackend(OcrBackendBridge.Register(new TestStub_RegisterOcrBackendTraitBridge()));
}
[Fact]
public void Test_RegisterPostProcessorTraitBridge()
{
// register_post_processor: trait bridge
KreuzbergLib.RegisterPostProcessor(PostProcessorBridge.Register(new TestStub_RegisterPostProcessorTraitBridge()));
}
[Fact]
public void Test_RegisterRendererTraitBridge()
{
// register_renderer: trait bridge
KreuzbergLib.RegisterRenderer(RendererBridge.Register(new TestStub_RegisterRendererTraitBridge()));
}
[Fact]
public void Test_RegisterValidatorTraitBridge()
{
// register_validator: trait bridge
KreuzbergLib.RegisterValidator(ValidatorBridge.Register(new TestStub_RegisterValidatorTraitBridge()));
}
[Fact]
public void Test_UnregisterDocumentExtractorAfterRegister()
{
// unregister_document_extractor
KreuzbergLib.UnregisterDocumentExtractor("test-extractor");
}
[Fact]
public void Test_UnregisterEmbeddingBackendAfterRegister()
{
// unregister_embedding_backend
KreuzbergLib.UnregisterEmbeddingBackend("test-embedding-backend");
}
[Fact]
public void Test_UnregisterPostProcessorAfterRegister()
{
// unregister_post_processor
KreuzbergLib.UnregisterPostProcessor("test-processor");
}
[Fact]
public void Test_UnregisterRendererAfterRegister()
{
// unregister_renderer
KreuzbergLib.UnregisterRenderer("test-renderer");
}
[Fact]
public void Test_UnregisterValidatorAfterRegister()
{
// unregister_validator
KreuzbergLib.UnregisterValidator("test-validator");
}
private class TestStub_RegisterDocumentExtractorTraitBridge : IDocumentExtractor
{
public string Name => "register_document_extractor_trait_bridge";
public string Version => "1.0.0";
public string ExtractBytes(byte[] content, string mimeType, ExtractionConfig config)
=> "";
public string ExtractFile(string path, string mimeType, ExtractionConfig config)
=> "";
public List<string> SupportedMimeTypes()
=> [];
public int Priority()
=> 0;
public bool CanHandle(string path, string mimeType)
=> false;
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterEmbeddingBackendTraitBridge : IEmbeddingBackend
{
public string Name => "register_embedding_backend_trait_bridge";
public string Version => "1.0.0";
public ulong Dimensions()
=> 0;
public List<List<float>> Embed(List<string> texts)
=> [];
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterOcrBackendTraitBridge : IOcrBackend
{
public string Name => "register_ocr_backend_trait_bridge";
public string Version => "1.0.0";
public ExtractionResult ProcessImage(byte[] imageBytes, OcrConfig config)
=> new ExtractionResult();
public ExtractionResult ProcessImageFile(string path, OcrConfig config)
=> new ExtractionResult();
public bool SupportsLanguage(string lang)
=> false;
public OcrBackendType BackendType()
=> OcrBackendType.Tesseract;
public List<string> SupportedLanguages()
=> [];
public bool SupportsTableDetection()
=> false;
public bool SupportsDocumentProcessing()
=> false;
public ExtractionResult ProcessDocument(string path, OcrConfig config)
=> new ExtractionResult();
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterPostProcessorTraitBridge : IPostProcessor
{
public string Name => "register_post_processor_trait_bridge";
public string Version => "1.0.0";
public void Process(ExtractionResult result, ExtractionConfig config) { }
public ProcessingStage ProcessingStage()
=> ProcessingStage.Early;
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
=> false;
public ulong EstimatedDurationMs(ExtractionResult result)
=> 0;
public int Priority()
=> 0;
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterRendererTraitBridge : IRenderer
{
public string Name => "register_renderer_trait_bridge";
public string Version => "1.0.0";
public string Render(string doc)
=> "";
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterValidatorTraitBridge : IValidator
{
public string Name => "register_validator_trait_bridge";
public string Version => "1.0.0";
public void Validate(ExtractionResult result, ExtractionConfig config) { }
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
=> false;
public int Priority()
=> 0;
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: post_processor_management.</summary>
public class PostProcessorManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_PostProcessorsClear()
{
// Clear all post-processors and verify list is empty
KreuzbergLib.ClearPostProcessors();
}
[Fact]
public void Test_PostProcessorsList()
{
// List all registered post-processors
var result = KreuzbergLib.ListPostProcessors();
}
}
}

View File

@@ -0,0 +1,52 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: registry_operations.</summary>
public class RegistryOperationsTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ExtensionsDocx()
{
// Get file extensions for DOCX MIME type
var result = KreuzbergLib.GetExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
}
[Fact]
public void Test_ExtensionsHtml()
{
// Get file extensions for HTML MIME type
var result = KreuzbergLib.GetExtensionsForMime("text/html");
}
[Fact]
public void Test_ExtensionsPdf()
{
// Get file extensions for PDF MIME type
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
}
}
}

76
e2e/csharp/tests/RegistryTests.cs generated Normal file
View File

@@ -0,0 +1,76 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: registry.</summary>
public class RegistryTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ListDocumentExtractors()
{
// List document extractors
var result = KreuzbergLib.ListDocumentExtractors();
}
[Fact]
public void Test_ListEmbeddingBackends()
{
// List embedding backends
var result = KreuzbergLib.ListEmbeddingBackends();
}
[Fact]
public void Test_ListOcrBackends()
{
// List OCR backends
var result = KreuzbergLib.ListOcrBackends();
}
[Fact]
public void Test_ListPostProcessors()
{
// List post-processors
var result = KreuzbergLib.ListPostProcessors();
}
[Fact]
public void Test_ListRenderers()
{
// List renderers
var result = KreuzbergLib.ListRenderers();
}
[Fact]
public void Test_ListValidators()
{
// List validators
var result = KreuzbergLib.ListValidators();
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: renderer_management.</summary>
public class RendererManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_RenderersClear()
{
// Clear all renderers and verify list is empty
KreuzbergLib.ClearRenderers();
}
[Fact]
public void Test_RenderersList()
{
// List all registered renderers
var result = KreuzbergLib.ListRenderers();
}
}
}

120
e2e/csharp/tests/SmokeTests.cs generated Normal file
View File

@@ -0,0 +1,120 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: smoke.</summary>
public class SmokeTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_OcrImagePng()
{
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("images/test_hello_world.png"), "image/png", ExtractionConfig.FromJson("{}"));
Assert.Equal("image/png", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 1, "expected length >= 1");
Assert.True(result.Content.ToString().Contains("Hello") || result.Content.ToString().Contains("World") || result.Content.ToString().Contains("hello") || result.Content.ToString().Contains("world"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeDocxBasic()
{
// Smoke test: DOCX with formatted text
var result = await KreuzbergLib.ExtractFileAsync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", new ExtractionConfig());
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 20, "expected length >= 20");
Assert.True(result.Content.ToString().Contains("Lorem") || result.Content.ToString().Contains("ipsum") || result.Content.ToString().Contains("document") || result.Content.ToString().Contains("text"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeHtmlBasic()
{
// Smoke test: HTML table extraction
var result = await KreuzbergLib.ExtractFileAsync("html/simple_table.html", "text/html", new ExtractionConfig());
Assert.Equal("text/html", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("Sample Data Table") || result.Content.ToString().Contains("Laptop") || result.Content.ToString().Contains("Electronics") || result.Content.ToString().Contains("Product"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeImagePng()
{
// Smoke test: PNG image (without OCR, metadata only)
var result = await KreuzbergLib.ExtractFileAsync("images/sample.png", null, new ExtractionConfig { DisableOcr = true });
Assert.Equal("image/png", result.MimeType!.Trim());
}
[Fact]
public async Task Test_SmokeJsonBasic()
{
// Smoke test: JSON file extraction
var result = await KreuzbergLib.ExtractFileAsync("json/simple.json", "application/json", new ExtractionConfig());
Assert.Equal("application/json", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public async Task Test_SmokePdfBasic()
{
// Smoke test: PDF with simple text extraction
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", "application/pdf", new ExtractionConfig());
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 50, "expected length >= 50");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("To Whom it May Concern"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeTxtBasic()
{
// Smoke test: Plain text file
var result = await KreuzbergLib.ExtractFileAsync("text/report.txt", "text/plain", new ExtractionConfig());
Assert.Equal("text/plain", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public async Task Test_SmokeXlsxBasic()
{
// Smoke test: XLSX with basic spreadsheet data including tables
var result = await KreuzbergLib.ExtractFileAsync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", new ExtractionConfig());
Assert.Equal("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 100, "expected length >= 100");
Assert.Contains("team", result.Content.ToString().ToLower());
Assert.Contains("location", result.Content.ToString().ToLower());
Assert.Contains("stanley cups", result.Content.ToString().ToLower());
Assert.Contains("blues", result.Content.ToString().ToLower());
Assert.Contains("flyers", result.Content.ToString().ToLower());
Assert.Contains("maple leafs", result.Content.ToString().ToLower());
Assert.Contains("stl", result.Content.ToString().ToLower());
Assert.Contains("phi", result.Content.ToString().ToLower());
Assert.Contains("tor", result.Content.ToString().ToLower());
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: validator_management.</summary>
public class ValidatorManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ValidatorsClear()
{
// Clear all validators and verify list is empty
KreuzbergLib.ClearValidators();
}
[Fact]
public void Test_ValidatorsList()
{
// List all registered validators
var result = KreuzbergLib.ListValidators();
}
}
}