Files
fil/e2e/csharp/tests/ContractTests.cs
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

204 lines
11 KiB
C#
Generated

// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: contract.</summary>
public class ContractTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_ApiBatchBytesAsync()
{
// Tests async batch bytes extraction API (batch_extract_bytes)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiBatchBytesWithConfigsAsync()
{
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public async Task Test_ApiBatchFileAsync()
{
// Tests async batch file extraction API (batch_extract_file)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiBatchFileWithConfigsAsync()
{
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public async Task Test_ApiExtractBytesAsync()
{
// Tests async bytes extraction API (extract_bytes)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiExtractFileAsync()
{
// Tests async file extraction API (extract_file)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_ConfigChunkingPrependHeadingContext()
{
// Tests markdown chunker prepends heading hierarchy to chunk content
var result = KreuzbergLib.ExtractFileSync("markdown/extraction_test.md", null, ExtractionConfig.FromJson("{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}"));
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'chunks' not available on result typeAssert.True((result.Chunks ?? new()).All(c => !string.IsNullOrEmpty(c.Content)));
Assert.True((result.Chunks ?? new()).All(c => c.Metadata?.HeadingContext != null));
Assert.True((result.Chunks ?? new()).FirstOrDefault()?.Metadata?.HeadingContext != null);
}
[Fact]
public void Test_ConfigDocumentStructureWithHeadings()
{
// Tests document structure with DOCX heading-driven nesting
var result = KreuzbergLib.ExtractFileSync("docx/fake.docx", null, ExtractionConfig.FromJson("{\"include_document_structure\":true}"));
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
}
[Fact]
public void Test_ConfigElementTypes()
{
// Tests element-based result format with element type assertions on DOCX
var result = KreuzbergLib.ExtractFileSync("docx/unit_test_headers.docx", null, ExtractionConfig.FromJson("{\"result_format\":\"element_based\"}"));
Assert.True(result.MimeType.ToString().Contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values");
// skipped: field 'elements' not available on result type
}
[Fact]
public void Test_ConfigExtractionTimeout()
{
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"extraction_timeout_secs\":300}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
}
[Fact]
public void Test_ConfigKeywords()
{
// Tests keyword extraction via YAKE algorithm
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'keywords' not available on C# ExtractionResult // skipped: field 'keywords' not available on C# ExtractionResult
}
[Fact]
public void Test_ConfigPages()
{
// Tests page extraction and page marker configuration
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("PAGE"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_ConfigQualityEnabled()
{
// Tests quality scoring produces a score value in [0.0, 1.0]
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"enable_quality_processing\":true}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
}
[Fact]
public void Test_ConfigSecurityLimits()
{
// Tests archive extraction with custom security limits
var result = KreuzbergLib.ExtractFileSync("archives/documents.zip", null, ExtractionConfig.FromJson("{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}"));
Assert.True(result.MimeType.ToString().Contains("application/zip") || result.MimeType.ToString().Contains("application/x-zip-compressed"), "expected to contain at least one of the specified values");
Assert.True(result.Content.Length >= 10, "expected length >= 10");
}
[Fact]
public void Test_ConfigTreeSitter()
{
// Tests tree-sitter configuration round-trip
var result = KreuzbergLib.ExtractFileSync("code/hello.py", null, ExtractionConfig.FromJson("{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}"));
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public void Test_OutputFormatBytesMarkdown()
{
// Tests markdown output format via bytes extraction API
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public void Test_OutputFormatMarkdown()
{
// Tests Markdown output format
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"output_format\":\"markdown\"}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
}
}