Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

20
e2e/csharp/Kreuzberg.E2eTests.csproj generated Normal file
View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.5.1" />
<PackageReference Include="xunit" Version="2.9.3" />
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.5" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="../../packages/csharp/Kreuzberg/Kreuzberg.csproj" />
</ItemGroup>
</Project>

42
e2e/csharp/TestSetup.cs generated Normal file
View File

@@ -0,0 +1,42 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.IO;
using System.Runtime.CompilerServices;
namespace Kreuzberg;
internal static class TestSetup
{
[ModuleInitializer]
internal static void Init()
{
// Walk up from the assembly directory until we find the repo root.
// Prefer a sibling test_documents/ directory (chdir into it so that
// fixture paths like "docx/fake.docx" resolve relative to it). If that
// is absent (web-crawler-style repos with no document fixtures), fall
// back to a sibling alef.toml or fixtures/ marker as the repo root.
var dir = new DirectoryInfo(AppContext.BaseDirectory);
DirectoryInfo? repoRoot = null;
while (dir != null)
{
var documentsCandidate = Path.Combine(dir.FullName, "test_documents");
if (Directory.Exists(documentsCandidate))
{
repoRoot = dir;
Directory.SetCurrentDirectory(documentsCandidate);
break;
}
if (File.Exists(Path.Combine(dir.FullName, "alef.toml"))
|| Directory.Exists(Path.Combine(dir.FullName, "fixtures")))
{
repoRoot = dir;
break;
}
dir = dir.Parent;
}
}
}

58
e2e/csharp/tests/AsyncTests.cs generated Normal file
View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: async.</summary>
public class AsyncTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_AsyncExtractBytes()
{
// Async extract_bytes call on PDF document
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 50, "expected length >= 50");
}
[Fact]
public async Task Test_AsyncExtractBytesEmptyMime()
{
// extract_bytes empty MIME async
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
{
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "", ExtractionConfig.FromJson("{}"));
});
}
[Fact]
public async Task Test_AsyncExtractBytesInvalidMime()
{
// extract_bytes unsupported MIME async
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
{
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", ExtractionConfig.FromJson("{}"));
});
}
}
}

110
e2e/csharp/tests/BatchTests.cs generated Normal file
View File

@@ -0,0 +1,110 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: batch.</summary>
public class BatchTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_BatchBytesInvalidMime()
{
// batch_extract_bytes_sync invalid MIME
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111 }, MimeType = "application/x-nonexistent" } }, null);
}
[Fact]
public async Task Test_BatchExtractBytesHappy()
{
// batch_extract_bytes: happy path with mixed inputs
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111, (byte)44, (byte)32, (byte)119, (byte)111, (byte)114, (byte)108, (byte)100, (byte)33 }, MimeType = "text/plain" }, new BatchBytesItem { Content = new byte[] { (byte)60, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62, (byte)60, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)84, (byte)101, (byte)115, (byte)116, (byte)60, (byte)47, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)60, (byte)47, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62 }, MimeType = "text/html" } }, null);
Assert.True(result.Count >= 1, "expected at least 1 elements");
}
[Fact]
public async Task Test_BatchExtractBytesMixedFormat()
{
// batch_extract_bytes: handles unsupported MIME gracefully
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)80, (byte)68, (byte)70, (byte)32, (byte)112, (byte)108, (byte)97, (byte)99, (byte)101, (byte)104, (byte)111, (byte)108, (byte)100, (byte)101, (byte)114 }, MimeType = "application/x-unknown" } }, null);
}
[Fact]
public void Test_BatchExtractBytesSyncEmptyList()
{
// batch_extract_bytes_sync: empty batch
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { }, null);
Assert.Equal(0, result.Count);
}
[Fact]
public void Test_BatchExtractBytesSyncInvalidMime()
{
// batch_extract_bytes_sync: unsupported MIME
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)100, (byte)97, (byte)116, (byte)97 }, MimeType = "application/x-unknown" } }, null);
}
[Fact]
public async Task Test_BatchFileAsyncBasic()
{
// Extract text from multiple files asynchronously
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
}
[Fact]
public async Task Test_BatchFileAsyncNotFound()
{
// batch_extract_file async nonexistent
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" } }, null);
}
[Fact]
public void Test_BatchFileNotFound()
{
// batch_extract_file_sync nonexistent
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" }, new BatchFileItem { Path = "/nonexistent/b.txt" } }, null);
}
[Fact]
public void Test_BatchFilePartial()
{
// batch_extract_file_sync mixed
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "text/plain.txt" }, new BatchFileItem { Path = "/nonexistent/missing.pdf" } }, null);
}
[Fact]
public void Test_BatchFileSyncBasic()
{
// Extract text from multiple files synchronously
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
}
}
}

40
e2e/csharp/tests/CodeTests.cs generated Normal file
View File

@@ -0,0 +1,40 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: code.</summary>
public class CodeTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_CodeShebangDetection()
{
// Test language detection from shebang line via bytes input
var result = KreuzbergLib.ExtractFileSync("code/script.sh", "text/x-source-code", null);
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.Contains("build", result.Content.ToString().ToLower());
Assert.Contains("clean", result.Content.ToString().ToLower());
}
}
}

203
e2e/csharp/tests/ContractTests.cs generated Normal file
View File

@@ -0,0 +1,203 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: contract.</summary>
public class ContractTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_ApiBatchBytesAsync()
{
// Tests async batch bytes extraction API (batch_extract_bytes)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiBatchBytesWithConfigsAsync()
{
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public async Task Test_ApiBatchFileAsync()
{
// Tests async batch file extraction API (batch_extract_file)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiBatchFileWithConfigsAsync()
{
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public async Task Test_ApiExtractBytesAsync()
{
// Tests async bytes extraction API (extract_bytes)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_ApiExtractFileAsync()
{
// Tests async file extraction API (extract_file)
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_ConfigChunkingPrependHeadingContext()
{
// Tests markdown chunker prepends heading hierarchy to chunk content
var result = KreuzbergLib.ExtractFileSync("markdown/extraction_test.md", null, ExtractionConfig.FromJson("{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}"));
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'chunks' not available on result typeAssert.True((result.Chunks ?? new()).All(c => !string.IsNullOrEmpty(c.Content)));
Assert.True((result.Chunks ?? new()).All(c => c.Metadata?.HeadingContext != null));
Assert.True((result.Chunks ?? new()).FirstOrDefault()?.Metadata?.HeadingContext != null);
}
[Fact]
public void Test_ConfigDocumentStructureWithHeadings()
{
// Tests document structure with DOCX heading-driven nesting
var result = KreuzbergLib.ExtractFileSync("docx/fake.docx", null, ExtractionConfig.FromJson("{\"include_document_structure\":true}"));
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
}
[Fact]
public void Test_ConfigElementTypes()
{
// Tests element-based result format with element type assertions on DOCX
var result = KreuzbergLib.ExtractFileSync("docx/unit_test_headers.docx", null, ExtractionConfig.FromJson("{\"result_format\":\"element_based\"}"));
Assert.True(result.MimeType.ToString().Contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values");
// skipped: field 'elements' not available on result type
}
[Fact]
public void Test_ConfigExtractionTimeout()
{
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"extraction_timeout_secs\":300}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
}
[Fact]
public void Test_ConfigKeywords()
{
// Tests keyword extraction via YAKE algorithm
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'keywords' not available on C# ExtractionResult // skipped: field 'keywords' not available on C# ExtractionResult
}
[Fact]
public void Test_ConfigPages()
{
// Tests page extraction and page marker configuration
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("PAGE"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_ConfigQualityEnabled()
{
// Tests quality scoring produces a score value in [0.0, 1.0]
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"enable_quality_processing\":true}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
}
[Fact]
public void Test_ConfigSecurityLimits()
{
// Tests archive extraction with custom security limits
var result = KreuzbergLib.ExtractFileSync("archives/documents.zip", null, ExtractionConfig.FromJson("{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}"));
Assert.True(result.MimeType.ToString().Contains("application/zip") || result.MimeType.ToString().Contains("application/x-zip-compressed"), "expected to contain at least one of the specified values");
Assert.True(result.Content.Length >= 10, "expected length >= 10");
}
[Fact]
public void Test_ConfigTreeSitter()
{
// Tests tree-sitter configuration round-trip
var result = KreuzbergLib.ExtractFileSync("code/hello.py", null, ExtractionConfig.FromJson("{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}"));
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public void Test_OutputFormatBytesMarkdown()
{
// Tests markdown output format via bytes extraction API
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
[Fact]
public void Test_OutputFormatMarkdown()
{
// Tests Markdown output format
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"output_format\":\"markdown\"}"));
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
// skipped: field 'metadata.output_format' not available on result type
}
}
}

62
e2e/csharp/tests/DetectionTests.cs generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: detection.</summary>
public class DetectionTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_DetectMimeBytesHtml()
{
// Detect HTML MIME from bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("html/html.html"));
}
[Fact]
public void Test_DetectMimeBytesPdf()
{
// Detect PDF MIME type from bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
}
[Fact]
public void Test_DetectMimeBytesPng()
{
// Detect PNG MIME type from bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
}
[Fact]
public void Test_GetExtensionsUnknownMime()
{
// get_extensions unknown MIME
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.GetExtensionsForMime("application/x-totally-unknown");
});
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: document_extractor_management.</summary>
public class DocumentExtractorManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_DocumentExtractorsClear()
{
// Clear all document extractors and verify list is empty
KreuzbergLib.ClearDocumentExtractors();
}
[Fact]
public void Test_ExtractorsList()
{
// List all registered document extractors
var result = KreuzbergLib.ListDocumentExtractors();
}
}
}

View File

@@ -0,0 +1,54 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embed_async_pending.</summary>
public class EmbedAsyncPendingTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_EmbedTextsAsyncEmptyInput()
{
// embed_texts_async: empty text list
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { }, null);
Assert.True(result.Count == 0);
}
[Fact]
public async Task Test_EmbedTextsAsyncHappy()
{
// embed_texts_async: basic async embedding
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"First\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Second\"", ConfigOptions)! }, null);
Assert.True(result.Count >= 2);
}
[Fact]
public async Task Test_EmbedTextsAsyncPresetSwitch()
{
// embed_texts_async: preset override
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"Text\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
}
}
}

36
e2e/csharp/tests/EmbedExtraTests.cs generated Normal file
View File

@@ -0,0 +1,36 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embed_extra.</summary>
public class EmbedExtraTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_EmbedTextsBatch()
{
// Batch embed texts
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"World\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embedding_backend_management.</summary>
public class EmbeddingBackendManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_EmbeddingBackendsClear()
{
// Clear all embedding backends and verify list is empty
KreuzbergLib.ClearEmbeddingBackends();
}
[Fact]
public void Test_EmbeddingBackendsList()
{
// List all registered embedding backends
var result = KreuzbergLib.ListEmbeddingBackends();
}
}
}

71
e2e/csharp/tests/EmbeddingsTests.cs generated Normal file
View File

@@ -0,0 +1,71 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: embeddings.</summary>
public class EmbeddingsTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_EmbedTextsDifferentPreset()
{
// embed_texts: multilingual preset
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello world\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Test\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"multilingual\",\"type\":\"preset\"}", ConfigOptions)! });
Assert.True(result.Count >= 2);
}
[Fact]
public void Test_GetEmbeddingPresetKnown()
{
// get_embedding_preset: known preset
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
}
[Fact]
public void Test_GetEmbeddingPresetNominal()
{
// get_embedding_preset: nominal case
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
}
[Fact]
public void Test_GetEmbeddingPresetUnknown()
{
// get_embedding_preset: unknown preset fails
var result = KreuzbergLib.GetEmbeddingPreset("nonexistent-xyz");
Assert.True(string.IsNullOrEmpty(result?.ToString()));
}
[Fact]
public void Test_ListEmbeddingPresetsSanity()
{
// list_embedding_presets: returns at least one
var result = KreuzbergLib.ListEmbeddingPresets();
Assert.NotEmpty(result);
}
}
}

76
e2e/csharp/tests/ErrorTests.cs generated Normal file
View File

@@ -0,0 +1,76 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: error.</summary>
public class ErrorTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ErrorEmptyBytes()
{
// Graceful handling of empty bytes (should not error)
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/empty.txt"), "text/plain", new ExtractionConfig());
}
[Fact]
public void Test_ErrorEmptyMime()
{
// Error when extracting with empty MIME type
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "", new ExtractionConfig());
});
}
[Fact]
public void Test_ErrorExtractBytesConflictingOcr()
{
// extract_bytes force+disable OCR
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/fake_text.txt"), "text/plain", new ExtractionConfig { DisableOcr = true, ForceOcr = true });
});
}
[Fact]
public void Test_ErrorInvalidMimeFormat()
{
// Error when extracting with invalid MIME type format
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "not-a-mime", new ExtractionConfig());
});
}
[Fact]
public void Test_ErrorUnsupportedMime()
{
// Error when extracting with unsupported MIME type
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", new ExtractionConfig());
});
}
}
}

73
e2e/csharp/tests/FormatSpecificTests.cs generated Normal file
View File

@@ -0,0 +1,73 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: format_specific.</summary>
public class FormatSpecificTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_FormatDocxStandalone()
{
// Standalone DOCX extraction using extract_bytes_sync
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null);
Assert.True(result.Content.Length >= 20, "expected length >= 20");
}
[Fact]
public void Test_FormatHwpxStandalone()
{
// Standalone HWPX extraction using extract_bytes_sync
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("hwpx/simple.hwpx"), "application/haansofthwpx", null);
Assert.True(result.Content.Length >= 20, "expected length >= 20");
Assert.Contains("hello from hwpx", result.Content.ToString().ToLower());
}
[Fact]
public void Test_FormatPdfText()
{
// Standalone PDF text extraction using extract_bytes_sync
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
Assert.True(result.Content.Length >= 50, "expected length >= 50");
Assert.True(result.Content.ToString().Contains("Mallori") || result.Content.ToString().Contains("May"), "expected to contain at least one of the specified values");
}
[Fact]
public void Test_FormatPptx()
{
// PPTX presentation extraction using extract_file_sync
var result = KreuzbergLib.ExtractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", null);
}
[Fact]
public void Test_FormatXlsx()
{
// XLSX spreadsheet extraction using extract_file_sync
var result = KreuzbergLib.ExtractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null);
}
}
}

55
e2e/csharp/tests/MimeUtilitiesTests.cs generated Normal file
View File

@@ -0,0 +1,55 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: mime_utilities.</summary>
public class MimeUtilitiesTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_MimeDetectBytes()
{
// Detect MIME type from file bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
Assert.Contains("pdf", result.ToString().ToLower());
}
[Fact]
public void Test_MimeDetectImage()
{
// Detect MIME type from PNG image bytes
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
Assert.Contains("png", result.ToString().ToLower());
}
[Fact]
public void Test_MimeGetExtensions()
{
// Get file extensions for a MIME type
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
Assert.Contains("pdf", JsonSerializer.Serialize(result).ToLower());
}
}
}

View File

@@ -0,0 +1,50 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: ocr_backend_management.</summary>
public class OcrBackendManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_OcrBackendsClear()
{
// Clear all OCR backends and verify list is empty
KreuzbergLib.ClearOcrBackends();
}
[Fact]
public void Test_OcrBackendsList()
{
// List all registered OCR backends
var result = KreuzbergLib.ListOcrBackends();
}
[Fact]
public void Test_OcrBackendsUnregister()
{
// Unregister nonexistent OCR backend gracefully
KreuzbergLib.UnregisterOcrBackend("nonexistent-backend-xyz");
}
}
}

48
e2e/csharp/tests/PdfTests.cs generated Normal file
View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: pdf.</summary>
public class PdfTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_RenderPdfPageFirst()
{
// render_pdf_page_to_png: first page
var result = KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 0, null, null);
Assert.NotNull(result);
// skipped: assertion type 'min_length' not supported on byte[] result
}
[Fact]
public void Test_RenderPdfPageOutOfRange()
{
// render_pdf_page_to_png: page out of range
Assert.ThrowsAny<KreuzbergException>(() =>
{
KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 999, null, null);
});
}
}
}

234
e2e/csharp/tests/PluginApiTests.cs generated Normal file
View File

@@ -0,0 +1,234 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: plugin_api.</summary>
public class PluginApiTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_RegisterDocumentExtractorTraitBridge()
{
// register_document_extractor: trait bridge
KreuzbergLib.RegisterDocumentExtractor(DocumentExtractorBridge.Register(new TestStub_RegisterDocumentExtractorTraitBridge()));
}
[Fact]
public void Test_RegisterEmbeddingBackendTraitBridge()
{
// register_embedding_backend: trait bridge
KreuzbergLib.RegisterEmbeddingBackend(EmbeddingBackendBridge.Register(new TestStub_RegisterEmbeddingBackendTraitBridge()));
}
[Fact]
public void Test_RegisterOcrBackendTraitBridge()
{
// register_ocr_backend: trait bridge
KreuzbergLib.RegisterOcrBackend(OcrBackendBridge.Register(new TestStub_RegisterOcrBackendTraitBridge()));
}
[Fact]
public void Test_RegisterPostProcessorTraitBridge()
{
// register_post_processor: trait bridge
KreuzbergLib.RegisterPostProcessor(PostProcessorBridge.Register(new TestStub_RegisterPostProcessorTraitBridge()));
}
[Fact]
public void Test_RegisterRendererTraitBridge()
{
// register_renderer: trait bridge
KreuzbergLib.RegisterRenderer(RendererBridge.Register(new TestStub_RegisterRendererTraitBridge()));
}
[Fact]
public void Test_RegisterValidatorTraitBridge()
{
// register_validator: trait bridge
KreuzbergLib.RegisterValidator(ValidatorBridge.Register(new TestStub_RegisterValidatorTraitBridge()));
}
[Fact]
public void Test_UnregisterDocumentExtractorAfterRegister()
{
// unregister_document_extractor
KreuzbergLib.UnregisterDocumentExtractor("test-extractor");
}
[Fact]
public void Test_UnregisterEmbeddingBackendAfterRegister()
{
// unregister_embedding_backend
KreuzbergLib.UnregisterEmbeddingBackend("test-embedding-backend");
}
[Fact]
public void Test_UnregisterPostProcessorAfterRegister()
{
// unregister_post_processor
KreuzbergLib.UnregisterPostProcessor("test-processor");
}
[Fact]
public void Test_UnregisterRendererAfterRegister()
{
// unregister_renderer
KreuzbergLib.UnregisterRenderer("test-renderer");
}
[Fact]
public void Test_UnregisterValidatorAfterRegister()
{
// unregister_validator
KreuzbergLib.UnregisterValidator("test-validator");
}
private class TestStub_RegisterDocumentExtractorTraitBridge : IDocumentExtractor
{
public string Name => "register_document_extractor_trait_bridge";
public string Version => "1.0.0";
public string ExtractBytes(byte[] content, string mimeType, ExtractionConfig config)
=> "";
public string ExtractFile(string path, string mimeType, ExtractionConfig config)
=> "";
public List<string> SupportedMimeTypes()
=> [];
public int Priority()
=> 0;
public bool CanHandle(string path, string mimeType)
=> false;
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterEmbeddingBackendTraitBridge : IEmbeddingBackend
{
public string Name => "register_embedding_backend_trait_bridge";
public string Version => "1.0.0";
public ulong Dimensions()
=> 0;
public List<List<float>> Embed(List<string> texts)
=> [];
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterOcrBackendTraitBridge : IOcrBackend
{
public string Name => "register_ocr_backend_trait_bridge";
public string Version => "1.0.0";
public ExtractionResult ProcessImage(byte[] imageBytes, OcrConfig config)
=> new ExtractionResult();
public ExtractionResult ProcessImageFile(string path, OcrConfig config)
=> new ExtractionResult();
public bool SupportsLanguage(string lang)
=> false;
public OcrBackendType BackendType()
=> OcrBackendType.Tesseract;
public List<string> SupportedLanguages()
=> [];
public bool SupportsTableDetection()
=> false;
public bool SupportsDocumentProcessing()
=> false;
public ExtractionResult ProcessDocument(string path, OcrConfig config)
=> new ExtractionResult();
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterPostProcessorTraitBridge : IPostProcessor
{
public string Name => "register_post_processor_trait_bridge";
public string Version => "1.0.0";
public void Process(ExtractionResult result, ExtractionConfig config) { }
public ProcessingStage ProcessingStage()
=> ProcessingStage.Early;
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
=> false;
public ulong EstimatedDurationMs(ExtractionResult result)
=> 0;
public int Priority()
=> 0;
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterRendererTraitBridge : IRenderer
{
public string Name => "register_renderer_trait_bridge";
public string Version => "1.0.0";
public string Render(string doc)
=> "";
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
private class TestStub_RegisterValidatorTraitBridge : IValidator
{
public string Name => "register_validator_trait_bridge";
public string Version => "1.0.0";
public void Validate(ExtractionResult result, ExtractionConfig config) { }
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
=> false;
public int Priority()
=> 0;
public void Initialize() { }
public void Shutdown() { }
public string Description()
=> "";
public string Author()
=> "";
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: post_processor_management.</summary>
public class PostProcessorManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_PostProcessorsClear()
{
// Clear all post-processors and verify list is empty
KreuzbergLib.ClearPostProcessors();
}
[Fact]
public void Test_PostProcessorsList()
{
// List all registered post-processors
var result = KreuzbergLib.ListPostProcessors();
}
}
}

View File

@@ -0,0 +1,52 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: registry_operations.</summary>
public class RegistryOperationsTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ExtensionsDocx()
{
// Get file extensions for DOCX MIME type
var result = KreuzbergLib.GetExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
}
[Fact]
public void Test_ExtensionsHtml()
{
// Get file extensions for HTML MIME type
var result = KreuzbergLib.GetExtensionsForMime("text/html");
}
[Fact]
public void Test_ExtensionsPdf()
{
// Get file extensions for PDF MIME type
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
}
}
}

76
e2e/csharp/tests/RegistryTests.cs generated Normal file
View File

@@ -0,0 +1,76 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: registry.</summary>
public class RegistryTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ListDocumentExtractors()
{
// List document extractors
var result = KreuzbergLib.ListDocumentExtractors();
}
[Fact]
public void Test_ListEmbeddingBackends()
{
// List embedding backends
var result = KreuzbergLib.ListEmbeddingBackends();
}
[Fact]
public void Test_ListOcrBackends()
{
// List OCR backends
var result = KreuzbergLib.ListOcrBackends();
}
[Fact]
public void Test_ListPostProcessors()
{
// List post-processors
var result = KreuzbergLib.ListPostProcessors();
}
[Fact]
public void Test_ListRenderers()
{
// List renderers
var result = KreuzbergLib.ListRenderers();
}
[Fact]
public void Test_ListValidators()
{
// List validators
var result = KreuzbergLib.ListValidators();
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: renderer_management.</summary>
public class RendererManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_RenderersClear()
{
// Clear all renderers and verify list is empty
KreuzbergLib.ClearRenderers();
}
[Fact]
public void Test_RenderersList()
{
// List all registered renderers
var result = KreuzbergLib.ListRenderers();
}
}
}

120
e2e/csharp/tests/SmokeTests.cs generated Normal file
View File

@@ -0,0 +1,120 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: smoke.</summary>
public class SmokeTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public async Task Test_OcrImagePng()
{
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("images/test_hello_world.png"), "image/png", ExtractionConfig.FromJson("{}"));
Assert.Equal("image/png", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 1, "expected length >= 1");
Assert.True(result.Content.ToString().Contains("Hello") || result.Content.ToString().Contains("World") || result.Content.ToString().Contains("hello") || result.Content.ToString().Contains("world"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeDocxBasic()
{
// Smoke test: DOCX with formatted text
var result = await KreuzbergLib.ExtractFileAsync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", new ExtractionConfig());
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 20, "expected length >= 20");
Assert.True(result.Content.ToString().Contains("Lorem") || result.Content.ToString().Contains("ipsum") || result.Content.ToString().Contains("document") || result.Content.ToString().Contains("text"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeHtmlBasic()
{
// Smoke test: HTML table extraction
var result = await KreuzbergLib.ExtractFileAsync("html/simple_table.html", "text/html", new ExtractionConfig());
Assert.Equal("text/html", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 10, "expected length >= 10");
Assert.True(result.Content.ToString().Contains("Sample Data Table") || result.Content.ToString().Contains("Laptop") || result.Content.ToString().Contains("Electronics") || result.Content.ToString().Contains("Product"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeImagePng()
{
// Smoke test: PNG image (without OCR, metadata only)
var result = await KreuzbergLib.ExtractFileAsync("images/sample.png", null, new ExtractionConfig { DisableOcr = true });
Assert.Equal("image/png", result.MimeType!.Trim());
}
[Fact]
public async Task Test_SmokeJsonBasic()
{
// Smoke test: JSON file extraction
var result = await KreuzbergLib.ExtractFileAsync("json/simple.json", "application/json", new ExtractionConfig());
Assert.Equal("application/json", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public async Task Test_SmokePdfBasic()
{
// Smoke test: PDF with simple text extraction
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", "application/pdf", new ExtractionConfig());
Assert.Equal("application/pdf", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 50, "expected length >= 50");
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("To Whom it May Concern"), "expected to contain at least one of the specified values");
}
[Fact]
public async Task Test_SmokeTxtBasic()
{
// Smoke test: Plain text file
var result = await KreuzbergLib.ExtractFileAsync("text/report.txt", "text/plain", new ExtractionConfig());
Assert.Equal("text/plain", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 5, "expected length >= 5");
}
[Fact]
public async Task Test_SmokeXlsxBasic()
{
// Smoke test: XLSX with basic spreadsheet data including tables
var result = await KreuzbergLib.ExtractFileAsync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", new ExtractionConfig());
Assert.Equal("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", result.MimeType!.Trim());
Assert.True(result.Content.Length >= 100, "expected length >= 100");
Assert.Contains("team", result.Content.ToString().ToLower());
Assert.Contains("location", result.Content.ToString().ToLower());
Assert.Contains("stanley cups", result.Content.ToString().ToLower());
Assert.Contains("blues", result.Content.ToString().ToLower());
Assert.Contains("flyers", result.Content.ToString().ToLower());
Assert.Contains("maple leafs", result.Content.ToString().ToLower());
Assert.Contains("stl", result.Content.ToString().ToLower());
Assert.Contains("phi", result.Content.ToString().ToLower());
Assert.Contains("tor", result.Content.ToString().ToLower());
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
}
}
}

View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Xunit;
using Kreuzberg;
using static Kreuzberg.KreuzbergLib;
namespace Kreuzberg
{
/// <summary>E2e tests for category: validator_management.</summary>
public class ValidatorManagementTests
{
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
[Fact]
public void Test_ValidatorsClear()
{
// Clear all validators and verify list is empty
KreuzbergLib.ClearValidators();
}
[Fact]
public void Test_ValidatorsList()
{
// List all registered validators
var result = KreuzbergLib.ListValidators();
}
}
}

4
e2e/dart/dart_test.yaml generated Normal file
View File

@@ -0,0 +1,4 @@
# Generated by alef — DO NOT EDIT.
# Run test files sequentially to avoid overwhelming the SUT with
# concurrent keep-alive connections.
concurrency: 1

14
e2e/dart/pubspec.yaml generated Normal file
View File

@@ -0,0 +1,14 @@
name: e2e_dart
version: 0.1.0
publish_to: none
environment:
sdk: ">=3.11.0 <4.0.0"
dependencies:
kreuzberg:
path: ../../packages/dart
dev_dependencies:
test: ^1.25.0
http: ^1.2.0

68
e2e/dart/test/async_test.dart generated Normal file
View File

@@ -0,0 +1,68 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: async
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Async extract_bytes call on PDF document', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(50));
});
test('extract_bytes empty MIME async', () async {
await expectLater(KreuzbergBridge.extractBytes(File('text/plain.txt').readAsBytesSync(), ''), throwsA(anything));
});
test('extract_bytes unsupported MIME async', () async {
await expectLater(KreuzbergBridge.extractBytes(File('text/plain.txt').readAsBytesSync(), 'application/x-nonexistent'), throwsA(anything));
});
}

71
e2e/dart/test/batch_test.dart generated Normal file
View File

@@ -0,0 +1,71 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
import 'dart:convert';
// E2e tests for category: batch
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Extract text from multiple files asynchronously', () async {
final result = await KreuzbergBridge.batchExtractFiles([BatchFileItem(path: 'pdf/fake_memo.pdf'), BatchFileItem(path: 'text/fake_text.txt')]);
});
test('batch_extract_file async nonexistent', () async {
final result = await KreuzbergBridge.batchExtractFiles([BatchFileItem(path: '/nonexistent/a.pdf')]);
});
test('batch_extract_file_sync nonexistent', () async {
final result = await KreuzbergBridge.batchExtractFilesSync([BatchFileItem(path: '/nonexistent/a.pdf'), BatchFileItem(path: '/nonexistent/b.txt')]);
});
test('batch_extract_file_sync mixed', () async {
final result = await KreuzbergBridge.batchExtractFilesSync([BatchFileItem(path: 'text/plain.txt'), BatchFileItem(path: '/nonexistent/missing.pdf')]);
});
test('Extract text from multiple files synchronously', () async {
final result = await KreuzbergBridge.batchExtractFilesSync([BatchFileItem(path: 'pdf/fake_memo.pdf'), BatchFileItem(path: 'text/fake_text.txt')]);
});
}

62
e2e/dart/test/code_test.dart generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: code
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Test language detection from shebang line via bytes input', () async {
final result = await KreuzbergBridge.extractFileSync('code/script.sh', 'text/x-source-code');
expect(result.mimeType.toString().trim(), equals('text/x-source-code'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content, contains('build'));
expect(result.content, contains('clean'));
});
}

184
e2e/dart/test/contract_test.dart generated Normal file
View File

@@ -0,0 +1,184 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: contract
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Tests async batch bytes extraction API (batch_extract_bytes)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
test('Tests async batch file extraction API (batch_extract_file)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
test('Tests async bytes extraction API (extract_bytes)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests async file extraction API (extract_file)', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
});
test('Tests markdown chunker prepends heading hierarchy to chunk content', () async {
final _config = await createExtractionConfigFromJson(json: '{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}');
final result = await KreuzbergBridge.extractBytesSync(File('markdown/extraction_test.md').readAsBytesSync(), 'text/markdown', _config);
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'chunks' not available on dart result type
// skipped: field 'chunks_have_content' not available on dart result type
// skipped: field 'chunks_have_heading_context' not available on dart result type
// skipped: field 'first_chunk_starts_with_heading' not available on dart result type
});
test('Tests document structure with DOCX heading-driven nesting', () async {
final result = await KreuzbergBridge.extractBytesSync(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: true, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim()));
// skipped: field 'document' not available on dart result type
// skipped: field 'document.nodes' not available on dart result type
});
test('Tests element-based result format with element type assertions on DOCX', () async {
final _config = await createExtractionConfigFromJson(json: '{"result_format":"element_based"}');
final result = await KreuzbergBridge.extractBytesSync(File('docx/unit_test_headers.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', _config);
expect(result.mimeType.contains('application/vnd.openxmlformats-officedocument.wordprocessingml.document'), isTrue);
// skipped: field 'elements' not available on dart result type
});
test('Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions', () async {
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
});
test('Tests keyword extraction via YAKE algorithm', () async {
final _config = await createExtractionConfigFromJson(json: '{"keywords":{"algorithm":"yake","max_keywords":10}}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'keywords' not available on dart result type
// skipped: field 'keywords' not available on dart result type
});
test('Tests page extraction and page marker configuration', () async {
final _config = await createExtractionConfigFromJson(json: '{"pages":{"extract_pages":true,"insert_page_markers":true}}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('PAGE'), isTrue);
});
test('Tests quality scoring produces a score value in [0.0, 1.0]', () async {
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'quality_score' not available on dart result type
// skipped: field 'quality_score' not available on dart result type
// skipped: field 'quality_score' not available on dart result type
});
test('Tests archive extraction with custom security limits', () async {
final _config = await createExtractionConfigFromJson(json: '{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}');
final result = await KreuzbergBridge.extractBytesSync(File('archives/documents.zip').readAsBytesSync(), 'application/zip', _config);
expect(result.mimeType.contains('application/zip') || result.mimeType.contains('application/x-zip-compressed'), isTrue);
expect(result.content.length, greaterThanOrEqualTo(10));
});
test('Tests tree-sitter configuration round-trip', () async {
final _config = await createExtractionConfigFromJson(json: '{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}');
final result = await KreuzbergBridge.extractFileSync('code/hello.py', 'text/x-source-code', _config);
expect(result.mimeType.toString().trim(), equals('text/x-source-code'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(5));
});
test('Tests markdown output format via bytes extraction API', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
test('Tests Markdown output format', () async {
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
// skipped: field 'metadata.output_format' not available on dart result type
});
}

70
e2e/dart/test/detection_test.dart generated Normal file
View File

@@ -0,0 +1,70 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: detection
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Detect HTML MIME from bytes', () async {
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('html/html.html').readAsBytesSync());
});
test('Detect PDF MIME type from bytes', () async {
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('pdf/fake_memo.pdf').readAsBytesSync());
});
test('Detect PNG MIME type from bytes', () async {
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('images/test_hello_world.png').readAsBytesSync());
});
test('get_extensions unknown MIME', () async {
await expectLater(KreuzbergBridge.getExtensionsForMime('application/x-totally-unknown'), throwsA(anything));
});
}

View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: document_extractor_management
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Clear all document extractors and verify list is empty', () async {
final result = await KreuzbergBridge.clearDocumentExtractors();
});
test('List all registered document extractors', () async {
final result = await KreuzbergBridge.listDocumentExtractors();
});
}

View File

@@ -0,0 +1,68 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
import 'dart:convert';
// E2e tests for category: embed_async_pending
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('embed_texts_async: empty text list', () async {
final _config = await createEmbeddingConfigFromJson(json: '{}');
final result = await KreuzbergBridge.embedTextsAsync(<String>[], _config);
expect(result.length, equals(0));
});
test('embed_texts_async: basic async embedding', () async {
final _config = await createEmbeddingConfigFromJson(json: '{}');
final result = await KreuzbergBridge.embedTextsAsync(<String>['First', 'Second'], _config);
expect(result.length, greaterThanOrEqualTo(2));
});
test('embed_texts_async: preset override', () async {
final _config = await createEmbeddingConfigFromJson(json: '{"model":{"name":"balanced","type":"preset"}}');
final result = await KreuzbergBridge.embedTextsAsync(<String>['Text'], _config);
});
}

56
e2e/dart/test/embed_extra_test.dart generated Normal file
View File

@@ -0,0 +1,56 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
import 'dart:convert';
// E2e tests for category: embed_extra
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Batch embed texts', () async {
final _config = await createEmbeddingConfigFromJson(json: '{"model":{"name":"balanced","type":"preset"}}');
final result = await KreuzbergBridge.embedTexts(<String>['Hello', 'World'], _config);
});
}

View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: embedding_backend_management
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Clear all embedding backends and verify list is empty', () async {
final result = await KreuzbergBridge.clearEmbeddingBackends();
});
test('List all registered embedding backends', () async {
final result = await KreuzbergBridge.listEmbeddingBackends();
});
}

75
e2e/dart/test/embeddings_test.dart generated Normal file
View File

@@ -0,0 +1,75 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
import 'dart:convert';
// E2e tests for category: embeddings
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('embed_texts: multilingual preset', () async {
final _config = await createEmbeddingConfigFromJson(json: '{"model":{"name":"multilingual","type":"preset"}}');
final result = await KreuzbergBridge.embedTexts(<String>['Hello world', 'Test'], _config);
expect(result.length, greaterThanOrEqualTo(2));
});
test('get_embedding_preset: known preset', () async {
final result = await KreuzbergBridge.getEmbeddingPreset('balanced');
});
test('get_embedding_preset: nominal case', () async {
final result = await KreuzbergBridge.getEmbeddingPreset('balanced');
});
test('get_embedding_preset: unknown preset fails', () async {
final result = await KreuzbergBridge.getEmbeddingPreset('nonexistent-xyz');
expect(result, anyOf(isNull, isEmpty));
});
test('list_embedding_presets: returns at least one', () async {
final result = await KreuzbergBridge.listEmbeddingPresets();
expect(result, isNotNull);
});
}

74
e2e/dart/test/error_test.dart generated Normal file
View File

@@ -0,0 +1,74 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: error
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Graceful handling of empty bytes (should not error)', () async {
final result = await KreuzbergBridge.extractBytesSync(File('text/empty.txt').readAsBytesSync(), 'text/plain');
});
test('Error when extracting with empty MIME type', () async {
await expectLater(KreuzbergBridge.extractBytesSync(File('text/plain.txt').readAsBytesSync(), ''), throwsA(anything));
});
test('extract_bytes force+disable OCR', () async {
await expectLater(KreuzbergBridge.extractBytesSync(File('text/fake_text.txt').readAsBytesSync(), 'text/plain', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: true, disableOcr: true, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3)), throwsA(anything));
});
test('Error when extracting with invalid MIME type format', () async {
await expectLater(KreuzbergBridge.extractBytesSync(File('text/plain.txt').readAsBytesSync(), 'not-a-mime'), throwsA(anything));
});
test('Error when extracting with unsupported MIME type', () async {
await expectLater(KreuzbergBridge.extractBytesSync(File('text/plain.txt').readAsBytesSync(), 'application/x-nonexistent'), throwsA(anything));
});
}

79
e2e/dart/test/format_specific_test.dart generated Normal file
View File

@@ -0,0 +1,79 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: format_specific
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Standalone DOCX extraction using extract_bytes_sync', () async {
final result = await KreuzbergBridge.extractBytesSync(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
expect(result.content.length, greaterThanOrEqualTo(20));
});
test('Standalone HWPX extraction using extract_bytes_sync', () async {
final result = await KreuzbergBridge.extractBytesSync(File('hwpx/simple.hwpx').readAsBytesSync(), 'application/haansofthwpx');
expect(result.content.length, greaterThanOrEqualTo(20));
expect(result.content, contains('Hello from HWPX'));
});
test('Standalone PDF text extraction using extract_bytes_sync', () async {
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.content.length, greaterThanOrEqualTo(50));
expect(result.content.contains('Mallori') || result.content.contains('May'), isTrue);
});
test('PPTX presentation extraction using extract_file_sync', () async {
final result = await KreuzbergBridge.extractBytesSync(File('pptx/simple.pptx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.presentationml.presentation');
});
test('XLSX spreadsheet extraction using extract_file_sync', () async {
final result = await KreuzbergBridge.extractBytesSync(File('xlsx/stanley_cups.xlsx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
});
}

69
e2e/dart/test/mime_utilities_test.dart generated Normal file
View File

@@ -0,0 +1,69 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: mime_utilities
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('Detect MIME type from file bytes', () async {
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('pdf/fake_memo.pdf').readAsBytesSync());
expect(result, contains('pdf'));
});
test('Detect MIME type from PNG image bytes', () async {
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('images/test_hello_world.png').readAsBytesSync());
expect(result, contains('png'));
});
test('Get file extensions for a MIME type', () async {
final result = await KreuzbergBridge.getExtensionsForMime('application/pdf');
expect(result, contains('pdf'));
});
}

16
e2e/dart/test/minimal_test.dart generated Normal file
View File

@@ -0,0 +1,16 @@
import 'package:test/test.dart';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
void main() {
setUpAll(() async {
await RustLib.init();
});
test('text extraction works', () async {
final content = Uint8List.fromList('Hello world'.codeUnits);
final result = await KreuzbergBridge.extractBytesSync(content, 'text/plain');
print('Text: ${result.content.substring(0, 5)}');
});
}

View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: ocr_backend_management
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Clear all OCR backends and verify list is empty', () async {
final result = await KreuzbergBridge.clearOcrBackends();
});
test('List all registered OCR backends', () async {
final result = await KreuzbergBridge.listOcrBackends();
});
test('Unregister nonexistent OCR backend gracefully', () async {
final result = await KreuzbergBridge.unregisterOcrBackend('nonexistent-backend-xyz');
});
}

63
e2e/dart/test/pdf_test.dart generated Normal file
View File

@@ -0,0 +1,63 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: pdf
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('render_pdf_page_to_png: first page', () async {
final result = await KreuzbergBridge.renderPdfPageToPng(File('pdf/fake_memo.pdf').readAsBytesSync(), 0);
expect(result.length, greaterThanOrEqualTo(100));
});
test('render_pdf_page_to_png: page out of range', () async {
await expectLater(KreuzbergBridge.renderPdfPageToPng(File('pdf/fake_memo.pdf').readAsBytesSync(), 999), throwsA(anything));
});
}

209
e2e/dart/test/plugin_api_test.dart generated Normal file
View File

@@ -0,0 +1,209 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:typed_data';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/kreuzberg.dart' show DocumentExtractor;
import 'package:kreuzberg/kreuzberg.dart' show OcrBackend;
import 'package:kreuzberg/kreuzberg.dart' show PostProcessor;
import 'package:kreuzberg/kreuzberg.dart' show Renderer;
import 'package:kreuzberg/kreuzberg.dart' show Validator;
import 'package:kreuzberg/kreuzberg.dart' show EmbeddingBackend;
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: plugin_api
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
class TestStubRegisterDocumentExtractorTraitBridge extends DocumentExtractor {
String get name => 'register_document_extractor_trait_bridge';
Future<InternalDocumentBridge> extractBytes(Uint8List content, String mimeType, ExtractionConfig config) async => throw UnimplementedError();
Future<InternalDocumentBridge> extractFile(String path, String mimeType, ExtractionConfig config) async => throw UnimplementedError();
Future<List<String>> supportedMimeTypes() async => [];
Future<int> priority() async => 1;
Future<bool> canHandle(String path, String mimeType) async => false;
}
final _TestStubRegisterDocumentExtractorTraitBridge_instance = TestStubRegisterDocumentExtractorTraitBridge();
Future<DocumentExtractorDartImpl> _createTestStubRegisterDocumentExtractorTraitBridgeWrapper() async => await createDocumentExtractorDartImpl(
pluginName: 'register_document_extractor_trait_bridge',
pluginVersion: '0.0.1',
extractBytes: (Uint8List content, String mimeType, ExtractionConfig config) => _TestStubRegisterDocumentExtractorTraitBridge_instance.extractBytes(content, mimeType, config),
extractFile: (String path, String mimeType, ExtractionConfig config) => _TestStubRegisterDocumentExtractorTraitBridge_instance.extractFile(path, mimeType, config),
supportedMimeTypes: () => _TestStubRegisterDocumentExtractorTraitBridge_instance.supportedMimeTypes(),
priority: () => _TestStubRegisterDocumentExtractorTraitBridge_instance.priority(),
canHandle: (String path, String mimeType) => _TestStubRegisterDocumentExtractorTraitBridge_instance.canHandle(path, mimeType)
);
class TestStubRegisterEmbeddingBackendTraitBridge extends EmbeddingBackend {
String get name => 'register_embedding_backend_trait_bridge';
Future<int> dimensions() async => 1;
Future<List<Float64List>> embed(List<String> texts) async => [];
}
final _TestStubRegisterEmbeddingBackendTraitBridge_instance = TestStubRegisterEmbeddingBackendTraitBridge();
Future<EmbeddingBackendDartImpl> _createTestStubRegisterEmbeddingBackendTraitBridgeWrapper() async => await createEmbeddingBackendDartImpl(
pluginName: 'register_embedding_backend_trait_bridge',
pluginVersion: '0.0.1',
dimensions: () => _TestStubRegisterEmbeddingBackendTraitBridge_instance.dimensions(),
embed: (List<String> texts) => _TestStubRegisterEmbeddingBackendTraitBridge_instance.embed(texts)
);
class TestStubRegisterOcrBackendTraitBridge extends OcrBackend {
String get name => 'register_ocr_backend_trait_bridge';
Future<ExtractionResult> processImage(Uint8List imageBytes, OcrConfig config) async => throw UnimplementedError();
Future<ExtractionResult> processImageFile(String path, OcrConfig config) async => throw UnimplementedError();
Future<bool> supportsLanguage(String lang) async => false;
Future<OcrBackendType> backendType() async => OcrBackendType.tesseract;
Future<List<String>> supportedLanguages() async => [];
Future<bool> supportsTableDetection() async => false;
Future<bool> supportsDocumentProcessing() async => false;
Future<ExtractionResult> processDocument(String path, OcrConfig config) async => throw UnimplementedError();
}
final _TestStubRegisterOcrBackendTraitBridge_instance = TestStubRegisterOcrBackendTraitBridge();
Future<OcrBackendDartImpl> _createTestStubRegisterOcrBackendTraitBridgeWrapper() async => await createOcrBackendDartImpl(
pluginName: 'register_ocr_backend_trait_bridge',
pluginVersion: '0.0.1',
processImage: (Uint8List imageBytes, OcrConfig config) => _TestStubRegisterOcrBackendTraitBridge_instance.processImage(imageBytes, config),
processImageFile: (String path, OcrConfig config) => _TestStubRegisterOcrBackendTraitBridge_instance.processImageFile(path, config),
supportsLanguage: (String lang) => _TestStubRegisterOcrBackendTraitBridge_instance.supportsLanguage(lang),
backendType: () => _TestStubRegisterOcrBackendTraitBridge_instance.backendType(),
supportedLanguages: () => _TestStubRegisterOcrBackendTraitBridge_instance.supportedLanguages(),
supportsTableDetection: () => _TestStubRegisterOcrBackendTraitBridge_instance.supportsTableDetection(),
supportsDocumentProcessing: () => _TestStubRegisterOcrBackendTraitBridge_instance.supportsDocumentProcessing(),
processDocument: (String path, OcrConfig config) => _TestStubRegisterOcrBackendTraitBridge_instance.processDocument(path, config)
);
class TestStubRegisterPostProcessorTraitBridge extends PostProcessor {
String get name => 'register_post_processor_trait_bridge';
Future<void> process(ExtractionResult result, ExtractionConfig config) async => null;
Future<ProcessingStage> processingStage() async => ProcessingStage.early;
Future<bool> shouldProcess(ExtractionResult result, ExtractionConfig config) async => false;
Future<int> estimatedDurationMs(ExtractionResult result) async => 1;
Future<int> priority() async => 1;
}
final _TestStubRegisterPostProcessorTraitBridge_instance = TestStubRegisterPostProcessorTraitBridge();
Future<PostProcessorDartImpl> _createTestStubRegisterPostProcessorTraitBridgeWrapper() async => await createPostProcessorDartImpl(
pluginName: 'register_post_processor_trait_bridge',
pluginVersion: '0.0.1',
process: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterPostProcessorTraitBridge_instance.process(result, config),
processingStage: () => _TestStubRegisterPostProcessorTraitBridge_instance.processingStage(),
shouldProcess: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterPostProcessorTraitBridge_instance.shouldProcess(result, config),
estimatedDurationMs: (ExtractionResult result) => _TestStubRegisterPostProcessorTraitBridge_instance.estimatedDurationMs(result),
priority: () => _TestStubRegisterPostProcessorTraitBridge_instance.priority()
);
class TestStubRegisterRendererTraitBridge extends Renderer {
String get name => 'register_renderer_trait_bridge';
Future<String> render(InternalDocumentBridge doc) async => '';
}
final _TestStubRegisterRendererTraitBridge_instance = TestStubRegisterRendererTraitBridge();
Future<RendererDartImpl> _createTestStubRegisterRendererTraitBridgeWrapper() async => await createRendererDartImpl(
pluginName: 'register_renderer_trait_bridge',
pluginVersion: '0.0.1',
render: (InternalDocumentBridge doc) => _TestStubRegisterRendererTraitBridge_instance.render(doc)
);
class TestStubRegisterValidatorTraitBridge extends Validator {
String get name => 'register_validator_trait_bridge';
Future<void> validate(ExtractionResult result, ExtractionConfig config) async => null;
Future<bool> shouldValidate(ExtractionResult result, ExtractionConfig config) async => false;
Future<int> priority() async => 1;
}
final _TestStubRegisterValidatorTraitBridge_instance = TestStubRegisterValidatorTraitBridge();
Future<ValidatorDartImpl> _createTestStubRegisterValidatorTraitBridgeWrapper() async => await createValidatorDartImpl(
pluginName: 'register_validator_trait_bridge',
pluginVersion: '0.0.1',
validate: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterValidatorTraitBridge_instance.validate(result, config),
shouldValidate: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterValidatorTraitBridge_instance.shouldValidate(result, config),
priority: () => _TestStubRegisterValidatorTraitBridge_instance.priority()
);
void main() {
setUpAll(() async {
await RustLib.init();
});
test('register_document_extractor: trait bridge', () async {
final result = await KreuzbergBridge.registerDocumentExtractor(await _createTestStubRegisterDocumentExtractorTraitBridgeWrapper());
});
test('register_embedding_backend: trait bridge', () async {
final result = await KreuzbergBridge.registerEmbeddingBackend(await _createTestStubRegisterEmbeddingBackendTraitBridgeWrapper());
});
test('register_ocr_backend: trait bridge', () async {
final result = await KreuzbergBridge.registerOcrBackend(await _createTestStubRegisterOcrBackendTraitBridgeWrapper());
});
test('register_post_processor: trait bridge', () async {
final result = await KreuzbergBridge.registerPostProcessor(await _createTestStubRegisterPostProcessorTraitBridgeWrapper());
});
test('register_renderer: trait bridge', () async {
final result = await KreuzbergBridge.registerRenderer(await _createTestStubRegisterRendererTraitBridgeWrapper());
});
test('register_validator: trait bridge', () async {
final result = await KreuzbergBridge.registerValidator(await _createTestStubRegisterValidatorTraitBridgeWrapper());
});
test('unregister_document_extractor', () async {
final result = await KreuzbergBridge.unregisterDocumentExtractor('test-extractor');
});
test('unregister_embedding_backend', () async {
final result = await KreuzbergBridge.unregisterEmbeddingBackend('test-embedding-backend');
});
test('unregister_post_processor', () async {
final result = await KreuzbergBridge.unregisterPostProcessor('test-processor');
});
test('unregister_renderer', () async {
final result = await KreuzbergBridge.unregisterRenderer('test-renderer');
});
test('unregister_validator', () async {
final result = await KreuzbergBridge.unregisterValidator('test-validator');
});
}

View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: post_processor_management
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Clear all post-processors and verify list is empty', () async {
final result = await KreuzbergBridge.clearPostProcessors();
});
test('List all registered post-processors', () async {
final result = await KreuzbergBridge.listPostProcessors();
});
}

View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: registry_operations
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Get file extensions for DOCX MIME type', () async {
final result = await KreuzbergBridge.getExtensionsForMime('application/vnd.openxmlformats-officedocument.wordprocessingml.document');
});
test('Get file extensions for HTML MIME type', () async {
final result = await KreuzbergBridge.getExtensionsForMime('text/html');
});
test('Get file extensions for PDF MIME type', () async {
final result = await KreuzbergBridge.getExtensionsForMime('application/pdf');
});
}

74
e2e/dart/test/registry_test.dart generated Normal file
View File

@@ -0,0 +1,74 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: registry
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('List document extractors', () async {
final result = await KreuzbergBridge.listDocumentExtractors();
});
test('List embedding backends', () async {
final result = await KreuzbergBridge.listEmbeddingBackends();
});
test('List OCR backends', () async {
final result = await KreuzbergBridge.listOcrBackends();
});
test('List post-processors', () async {
final result = await KreuzbergBridge.listPostProcessors();
});
test('List renderers', () async {
final result = await KreuzbergBridge.listRenderers();
});
test('List validators', () async {
final result = await KreuzbergBridge.listValidators();
});
}

View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: renderer_management
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Clear all renderers and verify list is empty', () async {
final result = await KreuzbergBridge.clearRenderers();
});
test('List all registered renderers', () async {
final result = await KreuzbergBridge.listRenderers();
});
}

117
e2e/dart/test/smoke_test.dart generated Normal file
View File

@@ -0,0 +1,117 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'dart:io';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: smoke
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
final _dir = Directory(_testDocs);
if (_dir.existsSync()) Directory.current = _dir;
});
test('OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.', () async {
final result = await KreuzbergBridge.extractBytes(File('images/test_hello_world.png').readAsBytesSync(), 'image/png');
expect(result.mimeType.toString().trim(), equals('image/png'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(1));
expect(result.content.contains('Hello') || result.content.contains('World') || result.content.contains('hello') || result.content.contains('world'), isTrue);
});
test('Smoke test: DOCX with formatted text', () async {
final result = await KreuzbergBridge.extractBytes(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(20));
expect(result.content.contains('Lorem') || result.content.contains('ipsum') || result.content.contains('document') || result.content.contains('text'), isTrue);
});
test('Smoke test: HTML table extraction', () async {
final result = await KreuzbergBridge.extractBytes(File('html/simple_table.html').readAsBytesSync(), 'text/html');
expect(result.mimeType.toString().trim(), equals('text/html'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(10));
expect(result.content.contains('Sample Data Table') || result.content.contains('Laptop') || result.content.contains('Electronics') || result.content.contains('Product'), isTrue);
});
test('Smoke test: PNG image (without OCR, metadata only)', () async {
final result = await KreuzbergBridge.extractBytes(File('images/sample.png').readAsBytesSync(), 'image/png', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: true, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
expect(result.mimeType.toString().trim(), equals('image/png'.toString().trim()));
});
test('Smoke test: JSON file extraction', () async {
final result = await KreuzbergBridge.extractBytes(File('json/simple.json').readAsBytesSync(), 'application/json');
expect(result.mimeType.toString().trim(), equals('application/json'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(5));
});
test('Smoke test: PDF with simple text extraction', () async {
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(50));
expect(result.content.contains('May 5, 2023') || result.content.contains('To Whom it May Concern'), isTrue);
});
test('Smoke test: Plain text file', () async {
final result = await KreuzbergBridge.extractBytes(File('text/report.txt').readAsBytesSync(), 'text/plain');
expect(result.mimeType.toString().trim(), equals('text/plain'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(5));
});
test('Smoke test: XLSX with basic spreadsheet data including tables', () async {
final result = await KreuzbergBridge.extractBytes(File('xlsx/stanley_cups.xlsx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'.toString().trim()));
expect(result.content.length, greaterThanOrEqualTo(100));
expect(result.content, contains('Team'));
expect(result.content, contains('Location'));
expect(result.content, contains('Stanley Cups'));
expect(result.content, contains('Blues'));
expect(result.content, contains('Flyers'));
expect(result.content, contains('Maple Leafs'));
expect(result.content, contains('STL'));
expect(result.content, contains('PHI'));
expect(result.content, contains('TOR'));
// skipped: field 'tables' not available on dart result type
// skipped: field 'metadata.format.excel.sheet_count' not available on dart result type
// skipped: field 'metadata.format.excel.sheet_names' not available on dart result type
});
}

View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// ignore_for_file: unused_local_variable
import 'package:test/test.dart';
import 'package:kreuzberg/kreuzberg.dart';
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
// E2e tests for category: validator_management
String _alefE2eText(Object? value) {
if (value == null) return '';
// Check if it's an enum by examining its toString representation.
final str = value.toString();
if (str.contains('.')) {
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
final parts = str.split('.');
if (parts.length == 2) {
final variantName = parts[1];
// Convert camelCase variant names to snake_case for serde compatibility.
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
return _camelToSnake(variantName);
}
}
return str;
}
String _camelToSnake(String camel) {
final buffer = StringBuffer();
for (int i = 0; i < camel.length; i++) {
final char = camel[i];
if (char.contains(RegExp(r'[A-Z]'))) {
if (i > 0) buffer.write('_');
buffer.write(char.toLowerCase());
} else {
buffer.write(char);
}
}
return buffer.toString();
}
void main() {
setUpAll(() async {
await RustLib.init();
});
test('Clear all validators and verify list is empty', () async {
final result = await KreuzbergBridge.clearValidators();
});
test('List all registered validators', () async {
final result = await KreuzbergBridge.listValidators();
});
}

3
e2e/elixir/lib/e2e_elixir.ex generated Normal file
View File

@@ -0,0 +1,3 @@
defmodule E2eElixir do
@moduledoc false
end

20
e2e/elixir/mix.exs generated Normal file
View File

@@ -0,0 +1,20 @@
defmodule E2eElixir.MixProject do
use Mix.Project
def project do
[
app: :e2e_elixir,
version: "0.1.0",
elixir: "~> 1.14",
deps: deps()
]
end
defp deps do
[
{:kreuzberg, path: "../../packages/elixir"},
{:rustler_precompiled, "~> 0.9"},
{:rustler, "~> 0.37.0", runtime: false}
]
end
end

32
e2e/elixir/test/async_test.exs generated Normal file
View File

@@ -0,0 +1,32 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: async
defmodule E2e.AsyncTest do
use ExUnit.Case, async: false
describe "async_extract_bytes" do
test "async_extract_bytes" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_async(content, "application/pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
end
end
describe "async_extract_bytes_empty_mime" do
test "async_extract_bytes_empty_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_async(content, "", "{}")
end
end
describe "async_extract_bytes_invalid_mime" do
test "async_extract_bytes_invalid_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_async(content, "application/x-nonexistent", "{}")
end
end
end

89
e2e/elixir/test/batch_test.exs generated Normal file
View File

@@ -0,0 +1,89 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: batch
defmodule E2e.BatchTest do
use ExUnit.Case, async: false
describe "batch_bytes_invalid_mime" do
@tag :skip
test "batch_bytes_invalid_mime" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_happy" do
@tag :skip
test "batch_extract_bytes_happy" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_mixed_format" do
@tag :skip
test "batch_extract_bytes_mixed_format" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_sync_empty_list" do
@tag :skip
test "batch_extract_bytes_sync_empty_list" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_extract_bytes_sync_invalid_mime" do
@tag :skip
test "batch_extract_bytes_sync_invalid_mime" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_async_basic" do
@tag :skip
test "batch_file_async_basic" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_async_not_found" do
@tag :skip
test "batch_file_async_not_found" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_not_found" do
@tag :skip
test "batch_file_not_found" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_partial" do
@tag :skip
test "batch_file_partial" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
describe "batch_file_sync_basic" do
@tag :skip
test "batch_file_sync_basic" do
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
:ok
end
end
end

19
e2e/elixir/test/code_test.exs generated Normal file
View File

@@ -0,0 +1,19 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: code
defmodule E2e.CodeTest do
use ExUnit.Case, async: false
describe "code_shebang_detection" do
test "code_shebang_detection" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/code/script.sh", mime_type: "text/x-source-code")
assert String.trim(result.mime_type) == "text/x-source-code"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert String.contains?(to_string(result.content), "build")
assert String.contains?(to_string(result.content), "clean")
end
end
end

183
e2e/elixir/test/contract_test.exs generated Normal file
View File

@@ -0,0 +1,183 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: contract
defmodule E2e.ContractTest do
use ExUnit.Case, async: false
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
defp alef_e2e_format_to_string(metadata) do
case metadata.image do
%{format: fmt} when is_binary(fmt) -> fmt
_ ->
case metadata.pdf do
%{} -> "PDF"
_ ->
case metadata.html do
%{} -> "HTML"
_ -> inspect(metadata)
end
end
end
end
describe "api_batch_bytes_async" do
test "api_batch_bytes_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "api_batch_bytes_with_configs_async" do
test "api_batch_bytes_with_configs_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
describe "api_batch_file_async" do
test "api_batch_file_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "api_batch_file_with_configs_async" do
test "api_batch_file_with_configs_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
describe "api_extract_bytes_async" do
test "api_extract_bytes_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "api_extract_file_async" do
test "api_extract_file_async" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "config_chunking_prepend_heading_context" do
test "config_chunking_prepend_heading_context" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/markdown/extraction_test.md", config: "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}")
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'chunks' not available on result type
assert Enum.all?(result.chunks || [], fn c -> c.content != nil and c.content != "" end)
assert Enum.all?(result.chunks || [], fn c -> c.metadata != nil and c.metadata.heading_context != nil end)
assert (case List.first(result.chunks || []) do
c when is_map(c) -> String.trim_leading(c.content || "") |> String.starts_with?("#")
_ -> false
end)
end
end
describe "config_document_structure_with_headings" do
test "config_document_structure_with_headings" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/docx/fake.docx", config: "{\"include_document_structure\":true}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
# skipped: field 'document' not available on result type
# skipped: field 'document.nodes' not available on result type
end
end
describe "config_element_types" do
test "config_element_types" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/docx/unit_test_headers.docx", config: "{\"result_format\":\"element_based\"}")
assert Enum.any?(["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], fn v -> String.contains?(to_string(result.mime_type), v) end)
# skipped: field 'elements' not available on result type
end
end
describe "config_extraction_timeout" do
test "config_extraction_timeout" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"extraction_timeout_secs\":300}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
end
end
describe "config_keywords" do
test "config_keywords" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'keywords' not available on Elixir ExtractionResult
# skipped: field 'keywords' not available on Elixir ExtractionResult
end
end
describe "config_pages" do
test "config_pages" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["PAGE"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "config_quality_enabled" do
test "config_quality_enabled" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"enable_quality_processing\":true}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'quality_score' not available on result type
# skipped: field 'quality_score' not available on result type
# skipped: field 'quality_score' not available on result type
end
end
describe "config_security_limits" do
test "config_security_limits" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/archives/documents.zip", config: "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}")
assert Enum.any?(["application/zip", "application/x-zip-compressed"], fn v -> String.contains?(to_string(result.mime_type), v) end)
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
end
end
describe "config_tree_sitter" do
test "config_tree_sitter" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/code/hello.py", config: "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}")
assert String.trim(result.mime_type) == "text/x-source-code"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "output_format_bytes_markdown" do
test "output_format_bytes_markdown" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf", "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
describe "output_format_markdown" do
test "output_format_markdown" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
# skipped: field 'metadata.output_format' not available on result type
end
end
end

36
e2e/elixir/test/detection_test.exs generated Normal file
View File

@@ -0,0 +1,36 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: detection
defmodule E2e.DetectionTest do
use ExUnit.Case, async: false
describe "detect_mime_bytes_html" do
test "detect_mime_bytes_html" do
content = File.read!("../../test_documents/html/html.html")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
end
end
describe "detect_mime_bytes_pdf" do
test "detect_mime_bytes_pdf" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
end
end
describe "detect_mime_bytes_png" do
test "detect_mime_bytes_png" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
end
end
describe "get_extensions_unknown_mime" do
test "get_extensions_unknown_mime" do
assert {:error, _} = Kreuzberg.get_extensions_for_mime("application/x-totally-unknown")
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: document_extractor_management
defmodule E2e.DocumentExtractorManagementTest do
use ExUnit.Case, async: false
describe "document_extractors_clear" do
test "document_extractors_clear" do
result = Kreuzberg.clear_document_extractors()
end
end
describe "extractors_list" do
test "extractors_list" do
result = Kreuzberg.list_document_extractors()
end
end
end

View File

@@ -0,0 +1,29 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embed_async_pending
defmodule E2e.EmbedAsyncPendingTest do
use ExUnit.Case, async: false
describe "embed_texts_async_empty_input" do
test "embed_texts_async_empty_input" do
{:ok, result} = Kreuzberg.embed_texts_async([])
assert length(result) == 0
end
end
describe "embed_texts_async_happy" do
test "embed_texts_async_happy" do
{:ok, result} = Kreuzberg.embed_texts_async(["First", "Second"])
assert length(result) >= 2
end
end
describe "embed_texts_async_preset_switch" do
test "embed_texts_async_preset_switch" do
{:ok, result} = Kreuzberg.embed_texts_async(["Text"], "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
end
end
end

15
e2e/elixir/test/embed_extra_test.exs generated Normal file
View File

@@ -0,0 +1,15 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embed_extra
defmodule E2e.EmbedExtraTest do
use ExUnit.Case, async: false
describe "embed_texts_batch" do
test "embed_texts_batch" do
{:ok, result} = Kreuzberg.embed_texts(["Hello", "World"], "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embedding_backend_management
defmodule E2e.EmbeddingBackendManagementTest do
use ExUnit.Case, async: false
describe "embedding_backends_clear" do
test "embedding_backends_clear" do
result = Kreuzberg.clear_embedding_backends()
end
end
describe "embedding_backends_list" do
test "embedding_backends_list" do
result = Kreuzberg.list_embedding_backends()
end
end
end

42
e2e/elixir/test/embeddings_test.exs generated Normal file
View File

@@ -0,0 +1,42 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: embeddings
defmodule E2e.EmbeddingsTest do
use ExUnit.Case, async: false
describe "embed_texts_different_preset" do
test "embed_texts_different_preset" do
{:ok, result} = Kreuzberg.embed_texts(["Hello world", "Test"], "{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}")
assert length(result) >= 2
end
end
describe "get_embedding_preset_known" do
test "get_embedding_preset_known" do
result = Kreuzberg.get_embedding_preset("balanced")
end
end
describe "get_embedding_preset_nominal" do
test "get_embedding_preset_nominal" do
result = Kreuzberg.get_embedding_preset("balanced")
end
end
describe "get_embedding_preset_unknown" do
test "get_embedding_preset_unknown" do
result = Kreuzberg.get_embedding_preset("nonexistent-xyz")
assert is_nil(result) or String.trim(result) == ""
end
end
describe "list_embedding_presets_sanity" do
test "list_embedding_presets_sanity" do
result = Kreuzberg.list_embedding_presets()
assert result != ""
end
end
end

44
e2e/elixir/test/error_test.exs generated Normal file
View File

@@ -0,0 +1,44 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: error
defmodule E2e.ErrorTest do
use ExUnit.Case, async: false
describe "error_empty_bytes" do
test "error_empty_bytes" do
content = File.read!("../../test_documents/text/empty.txt")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "text/plain", "{}")
end
end
describe "error_empty_mime" do
test "error_empty_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "", "{}")
end
end
describe "error_extract_bytes_conflicting_ocr" do
test "error_extract_bytes_conflicting_ocr" do
content = File.read!("../../test_documents/text/fake_text.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "text/plain", "{\"disable_ocr\":true,\"force_ocr\":true}")
end
end
describe "error_invalid_mime_format" do
test "error_invalid_mime_format" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "not-a-mime", "{}")
end
end
describe "error_unsupported_mime" do
test "error_unsupported_mime" do
content = File.read!("../../test_documents/text/plain.txt")
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "application/x-nonexistent", "{}")
end
end
end

47
e2e/elixir/test/format_specific_test.exs generated Normal file
View File

@@ -0,0 +1,47 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: format_specific
defmodule E2e.FormatSpecificTest do
use ExUnit.Case, async: false
describe "format_docx_standalone" do
test "format_docx_standalone" do
content = File.read!("../../test_documents/docx/fake.docx")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
end
end
describe "format_hwpx_standalone" do
test "format_hwpx_standalone" do
content = File.read!("../../test_documents/hwpx/simple.hwpx")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/haansofthwpx")
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
assert String.contains?(to_string(result.content), "Hello from HWPX")
end
end
describe "format_pdf_text" do
test "format_pdf_text" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf")
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
assert Enum.any?(["Mallori", "May"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "format_pptx" do
test "format_pptx" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pptx/simple.pptx", mime_type: "application/vnd.openxmlformats-officedocument.presentationml.presentation")
end
end
describe "format_xlsx" do
test "format_xlsx" do
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
end
end
end

32
e2e/elixir/test/mime_utilities_test.exs generated Normal file
View File

@@ -0,0 +1,32 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: mime_utilities
defmodule E2e.MimeUtilitiesTest do
use ExUnit.Case, async: false
describe "mime_detect_bytes" do
test "mime_detect_bytes" do
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
assert String.contains?(to_string(result), "pdf")
end
end
describe "mime_detect_image" do
test "mime_detect_image" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
assert String.contains?(to_string(result), "png")
end
end
describe "mime_get_extensions" do
test "mime_get_extensions" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/pdf")
assert String.contains?(to_string(result), "pdf")
end
end
end

View File

@@ -0,0 +1,27 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: ocr_backend_management
defmodule E2e.OcrBackendManagementTest do
use ExUnit.Case, async: false
describe "ocr_backends_clear" do
test "ocr_backends_clear" do
result = Kreuzberg.clear_ocr_backends()
end
end
describe "ocr_backends_list" do
test "ocr_backends_list" do
result = Kreuzberg.list_ocr_backends()
end
end
describe "ocr_backends_unregister" do
test "ocr_backends_unregister" do
result = Kreuzberg.unregister_ocr_backend("nonexistent-backend-xyz")
end
end
end

24
e2e/elixir/test/pdf_test.exs generated Normal file
View File

@@ -0,0 +1,24 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: pdf
defmodule E2e.PdfTest do
use ExUnit.Case, async: false
describe "render_pdf_page_first" do
test "render_pdf_page_first" do
pdf_bytes = File.read!("../../test_documents/pdf/fake_memo.pdf")
{:ok, result} = Kreuzberg.render_pdf_page_to_png(pdf_bytes, 0)
assert (is_binary(result) && byte_size(result) >= 100) || (is_list(result) && length(result) >= 100) || (is_binary(result) == false && is_list(result) == false && String.length(result) >= 100)
end
end
describe "render_pdf_page_out_of_range" do
test "render_pdf_page_out_of_range" do
pdf_bytes = File.read!("../../test_documents/pdf/fake_memo.pdf")
assert {:error, _} = Kreuzberg.render_pdf_page_to_png(pdf_bytes, 999)
end
end
end

327
e2e/elixir/test/plugin_api_test.exs generated Normal file
View File

@@ -0,0 +1,327 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: plugin_api
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge do
def name, do: "test-extractor"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def extract_bytes(content, mime_type, config), do: {:ok, %{}}
def extract_file(path, mime_type, config), do: {:ok, %{}}
def supported_mime_types, do: []
def priority, do: 0
def can_handle(_path, _mime_type), do: false
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("extract_bytes", args), do: [args["content"], args["mime_type"], args["config"]]
defp __alef_ordered_args__("extract_file", args), do: [args["path"], args["mime_type"], args["config"]]
defp __alef_ordered_args__("supported_mime_types", args), do: []
defp __alef_ordered_args__("priority", args), do: []
defp __alef_ordered_args__("can_handle", args), do: [args["_path"], args["_mime_type"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge do
def name, do: "test-embedding-backend"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def dimensions, do: 1
def embed(texts), do: {:ok, []}
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("dimensions", args), do: []
defp __alef_ordered_args__("embed", args), do: [args["texts"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge do
def name, do: "test-backend"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def process_image(image_bytes, config), do: {:ok, %{}}
def process_image_file(path, config), do: {:ok, %{}}
def supports_language(lang), do: false
def backend_type, do: %{}
def supported_languages, do: []
def supports_table_detection, do: false
def supports_document_processing, do: false
def process_document(_path, _config), do: {:ok, %{}}
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("process_image", args), do: [args["image_bytes"], args["config"]]
defp __alef_ordered_args__("process_image_file", args), do: [args["path"], args["config"]]
defp __alef_ordered_args__("supports_language", args), do: [args["lang"]]
defp __alef_ordered_args__("backend_type", args), do: []
defp __alef_ordered_args__("supported_languages", args), do: []
defp __alef_ordered_args__("supports_table_detection", args), do: []
defp __alef_ordered_args__("supports_document_processing", args), do: []
defp __alef_ordered_args__("process_document", args), do: [args["_path"], args["_config"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge do
def name, do: "test-processor"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def process(result, config), do: {:ok, nil}
def processing_stage, do: %{}
def should_process(_result, _config), do: false
def estimated_duration_ms(_result), do: 0
def priority, do: 0
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("process", args), do: [args["result"], args["config"]]
defp __alef_ordered_args__("processing_stage", args), do: []
defp __alef_ordered_args__("should_process", args), do: [args["_result"], args["_config"]]
defp __alef_ordered_args__("estimated_duration_ms", args), do: [args["_result"]]
defp __alef_ordered_args__("priority", args), do: []
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterRendererTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterRendererTraitBridge do
def name, do: "test-renderer"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def render(doc), do: {:ok, ""}
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterRendererTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("render", args), do: [args["doc"]]
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterValidatorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterValidatorTraitBridge do
def name, do: "test-validator"
def version, do: "test"
def initialize, do: :ok
def shutdown, do: :ok
def validate(result, config), do: {:ok, nil}
def should_validate(_result, _config), do: false
def priority, do: 0
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
method_name = to_string(method_atom)
ordered_args = __alef_ordered_args__(method_name, args)
result = apply(E2e.TestStubs.TestStubRegisterValidatorTraitBridge, String.to_existing_atom(method_name), ordered_args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
defp __alef_ordered_args__("validate", args), do: [args["result"], args["config"]]
defp __alef_ordered_args__("should_validate", args), do: [args["_result"], args["_config"]]
defp __alef_ordered_args__("priority", args), do: []
defp __alef_ordered_args__("version", _args), do: []
defp __alef_ordered_args__("initialize", _args), do: []
defp __alef_ordered_args__("shutdown", _args), do: []
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
end
end
defmodule E2e.PluginApiTest do
use ExUnit.Case, async: false
describe "register_document_extractor_trait_bridge" do
test "register_document_extractor_trait_bridge" do
{:ok, registerdocumentextractortraitbridge_pid} = E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_document_extractor(registerdocumentextractortraitbridge_pid, "test-extractor")
end
end
describe "register_embedding_backend_trait_bridge" do
test "register_embedding_backend_trait_bridge" do
{:ok, registerembeddingbackendtraitbridge_pid} = E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_embedding_backend(registerembeddingbackendtraitbridge_pid, "test-embedding-backend")
end
end
describe "register_ocr_backend_trait_bridge" do
test "register_ocr_backend_trait_bridge" do
{:ok, registerocrbackendtraitbridge_pid} = E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_ocr_backend(registerocrbackendtraitbridge_pid, "test-backend")
end
end
describe "register_post_processor_trait_bridge" do
test "register_post_processor_trait_bridge" do
{:ok, registerpostprocessortraitbridge_pid} = E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_post_processor(registerpostprocessortraitbridge_pid, "test-processor")
end
end
describe "register_renderer_trait_bridge" do
test "register_renderer_trait_bridge" do
{:ok, registerrenderertraitbridge_pid} = E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_renderer(registerrenderertraitbridge_pid, "test-renderer")
end
end
describe "register_validator_trait_bridge" do
test "register_validator_trait_bridge" do
{:ok, registervalidatortraitbridge_pid} = E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_validator(registervalidatortraitbridge_pid, "test-validator")
end
end
describe "unregister_document_extractor_after_register" do
test "unregister_document_extractor_after_register" do
result = Kreuzberg.unregister_document_extractor("test-extractor")
end
end
describe "unregister_embedding_backend_after_register" do
test "unregister_embedding_backend_after_register" do
result = Kreuzberg.unregister_embedding_backend("test-embedding-backend")
end
end
describe "unregister_post_processor_after_register" do
test "unregister_post_processor_after_register" do
result = Kreuzberg.unregister_post_processor("test-processor")
end
end
describe "unregister_renderer_after_register" do
test "unregister_renderer_after_register" do
result = Kreuzberg.unregister_renderer("test-renderer")
end
end
describe "unregister_validator_after_register" do
test "unregister_validator_after_register" do
result = Kreuzberg.unregister_validator("test-validator")
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: post_processor_management
defmodule E2e.PostProcessorManagementTest do
use ExUnit.Case, async: false
describe "post_processors_clear" do
test "post_processors_clear" do
result = Kreuzberg.clear_post_processors()
end
end
describe "post_processors_list" do
test "post_processors_list" do
result = Kreuzberg.list_post_processors()
end
end
end

View File

@@ -0,0 +1,27 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: registry_operations
defmodule E2e.RegistryOperationsTest do
use ExUnit.Case, async: false
describe "extensions_docx" do
test "extensions_docx" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
end
end
describe "extensions_html" do
test "extensions_html" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("text/html")
end
end
describe "extensions_pdf" do
test "extensions_pdf" do
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/pdf")
end
end
end

45
e2e/elixir/test/registry_test.exs generated Normal file
View File

@@ -0,0 +1,45 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: registry
defmodule E2e.RegistryTest do
use ExUnit.Case, async: false
describe "list_document_extractors" do
test "list_document_extractors" do
result = Kreuzberg.list_document_extractors()
end
end
describe "list_embedding_backends" do
test "list_embedding_backends" do
result = Kreuzberg.list_embedding_backends()
end
end
describe "list_ocr_backends" do
test "list_ocr_backends" do
result = Kreuzberg.list_ocr_backends()
end
end
describe "list_post_processors" do
test "list_post_processors" do
result = Kreuzberg.list_post_processors()
end
end
describe "list_renderers" do
test "list_renderers" do
result = Kreuzberg.list_renderers()
end
end
describe "list_validators" do
test "list_validators" do
result = Kreuzberg.list_validators()
end
end
end

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: renderer_management
defmodule E2e.RendererManagementTest do
use ExUnit.Case, async: false
describe "renderers_clear" do
test "renderers_clear" do
result = Kreuzberg.clear_renderers()
end
end
describe "renderers_list" do
test "renderers_list" do
result = Kreuzberg.list_renderers()
end
end
end

118
e2e/elixir/test/smoke_test.exs generated Normal file
View File

@@ -0,0 +1,118 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: smoke
defmodule E2e.SmokeTest do
use ExUnit.Case, async: false
defp alef_e2e_item_texts(item) when is_binary(item), do: [item]
defp alef_e2e_item_texts(item) do
[:kind, :name, :signature, :path, :alias, :text, :source]
|> Enum.filter(&Map.has_key?(item, &1))
|> Enum.flat_map(fn attr ->
case Map.get(item, attr) do
nil -> []
atom when is_atom(atom) -> [atom |> to_string() |> String.capitalize()]
str -> [inspect(str)]
end
end)
end
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
defp alef_e2e_format_to_string(metadata) do
case metadata.image do
%{format: fmt} when is_binary(fmt) -> fmt
_ ->
case metadata.pdf do
%{} -> "PDF"
_ ->
case metadata.html do
%{} -> "HTML"
_ -> inspect(metadata)
end
end
end
end
describe "ocr_image_png" do
test "ocr_image_png" do
content = File.read!("../../test_documents/images/test_hello_world.png")
{:ok, result} = Kreuzberg.extract_bytes_async(content, "image/png", "{}")
assert String.trim(result.mime_type) == "image/png"
assert (is_binary(result.content) && byte_size(result.content) >= 1) || (is_list(result.content) && length(result.content) >= 1) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 1)
assert Enum.any?(["Hello", "World", "hello", "world"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_docx_basic" do
test "smoke_docx_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/docx/fake.docx", mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config: "{}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
assert Enum.any?(["Lorem", "ipsum", "document", "text"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_html_basic" do
test "smoke_html_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/html/simple_table.html", mime_type: "text/html", config: "{}")
assert String.trim(result.mime_type) == "text/html"
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
assert Enum.any?(["Sample Data Table", "Laptop", "Electronics", "Product"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_image_png" do
test "smoke_image_png" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/images/sample.png", config: "{\"disable_ocr\":true}")
assert String.trim(result.mime_type) == "image/png"
end
end
describe "smoke_json_basic" do
test "smoke_json_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/json/simple.json", mime_type: "application/json", config: "{}")
assert String.trim(result.mime_type) == "application/json"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "smoke_pdf_basic" do
test "smoke_pdf_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", mime_type: "application/pdf", config: "{}")
assert String.trim(result.mime_type) == "application/pdf"
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
assert Enum.any?(["May 5, 2023", "To Whom it May Concern"], fn v -> String.contains?(to_string(result.content), v) end)
end
end
describe "smoke_txt_basic" do
test "smoke_txt_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/text/report.txt", mime_type: "text/plain", config: "{}")
assert String.trim(result.mime_type) == "text/plain"
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
end
end
describe "smoke_xlsx_basic" do
test "smoke_xlsx_basic" do
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config: "{}")
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
assert (is_binary(result.content) && byte_size(result.content) >= 100) || (is_list(result.content) && length(result.content) >= 100) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 100)
assert String.contains?(to_string(result.content), "Team")
assert String.contains?(to_string(result.content), "Location")
assert String.contains?(to_string(result.content), "Stanley Cups")
assert String.contains?(to_string(result.content), "Blues")
assert String.contains?(to_string(result.content), "Flyers")
assert String.contains?(to_string(result.content), "Maple Leafs")
assert String.contains?(to_string(result.content), "STL")
assert String.contains?(to_string(result.content), "PHI")
assert String.contains?(to_string(result.content), "TOR")
# skipped: field 'tables' not available on result type
# skipped: field 'metadata.format.excel.sheet_count' not available on result type
# skipped: field 'metadata.format.excel.sheet_names' not available on result type
end
end
end

1
e2e/elixir/test/test_helper.exs generated Normal file
View File

@@ -0,0 +1 @@
ExUnit.start()

View File

@@ -0,0 +1,21 @@
# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
# Issues & docs: https://github.com/kreuzberg-dev/alef
# E2e tests for category: validator_management
defmodule E2e.ValidatorManagementTest do
use ExUnit.Case, async: false
describe "validators_clear" do
test "validators_clear" do
result = Kreuzberg.clear_validators()
end
end
describe "validators_list" do
test "validators_list" do
result = Kreuzberg.list_validators()
end
end
end

41
e2e/elixir/test_syntax_check.exs generated Normal file
View File

@@ -0,0 +1,41 @@
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge) do
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge do
def name, do: "test-extractor"
def initialize, do: :ok
def extract_bytes(content, mime_type, config), do: {:ok, %{}}
def supported_mime_types, do: []
end
end
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer) do
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer do
use GenServer
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
@impl true
def init(_), do: {:ok, nil}
@impl true
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
args = Jason.decode!(args_json)
result = apply(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge, method_atom, args)
result_json = Jason.encode!(result)
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
{:noreply, state}
end
end
end
defmodule E2e.PluginApiTest do
use ExUnit.Case
describe "register_document_extractor_trait_bridge" do
test "register_document_extractor_trait_bridge" do
{:ok, registerdocumentextractortraitbridge_pid} = E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer.start_link(nil)
result = Kreuzberg.register_document_extractor(registerdocumentextractortraitbridge_pid, "test-extractor")
end
end
end

58
e2e/go/async_test.go generated Normal file
View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: async
package e2e_test
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_AsyncExtractBytes(t *testing.T) {
// Async extract_bytes call on PDF document
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytes(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
}
func Test_AsyncExtractBytesEmptyMime(t *testing.T) {
// extract_bytes empty MIME async
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytes(contentBytes, ``, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_AsyncExtractBytesInvalidMime(t *testing.T) {
// extract_bytes unsupported MIME async
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytes(contentBytes, `application/x-nonexistent`, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

139
e2e/go/batch_test.go generated Normal file
View File

@@ -0,0 +1,139 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: batch
package e2e_test
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_BatchBytesInvalidMime(t *testing.T) {
// batch_extract_bytes_sync invalid MIME
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"SGVsbG8=","mime_type":"application/x-nonexistent"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchExtractBytesHappy(t *testing.T) {
// batch_extract_bytes: happy path with mixed inputs
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"SGVsbG8sIHdvcmxkIQ==","mime_type":"text/plain"},{"content":"PGh0bWw+PGJvZHk+VGVzdDwvYm9keT48L2h0bWw+","mime_type":"text/html"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.BatchExtractBytes(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result), 1, "expected at least 1 elements")
}
func Test_BatchExtractBytesMixedFormat(t *testing.T) {
// batch_extract_bytes: handles unsupported MIME gracefully
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"UERGIHBsYWNlaG9sZGVy","mime_type":"application/x-unknown"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractBytes(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchExtractBytesSyncEmptyList(t *testing.T) {
// batch_extract_bytes_sync: empty batch
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.Equal(t, len(result), 0, "expected exactly 0 elements")
}
func Test_BatchExtractBytesSyncInvalidMime(t *testing.T) {
// batch_extract_bytes_sync: unsupported MIME
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"ZGF0YQ==","mime_type":"application/x-unknown"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileAsyncBasic(t *testing.T) {
// Extract text from multiple files asynchronously
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"pdf/fake_memo.pdf"},{"path":"text/fake_text.txt"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFiles(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileAsyncNotFound(t *testing.T) {
// batch_extract_file async nonexistent
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"/nonexistent/a.pdf"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFiles(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileNotFound(t *testing.T) {
// batch_extract_file_sync nonexistent
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"/nonexistent/a.pdf"},{"path":"/nonexistent/b.txt"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFilePartial(t *testing.T) {
// batch_extract_file_sync mixed
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"text/plain.txt"},{"path":"/nonexistent/missing.pdf"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileSyncBasic(t *testing.T) {
// Extract text from multiple files synchronously
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"pdf/fake_memo.pdf"},{"path":"text/fake_text.txt"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

36
e2e/go/code_test.go generated Normal file
View File

@@ -0,0 +1,36 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: code
package e2e_test
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_CodeShebangDetection(t *testing.T) {
// Test language detection from shebang line via bytes input
mime_typeVal := `text/x-source-code`
result, err := kreuzberg.ExtractFileSync(`code/script.sh`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/x-source-code` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
if !strings.Contains(string(result.Content), `build`) {
t.Errorf("expected to contain %s", `build`)
}
if !strings.Contains(string(result.Content), `clean`) {
t.Errorf("expected to contain %s", `clean`)
}
}

338
e2e/go/contract_test.go generated Normal file
View File

@@ -0,0 +1,338 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: contract
package e2e_test
import (
"encoding/json"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ApiBatchBytesAsync(t *testing.T) {
// Tests async batch bytes extraction API (batch_extract_bytes)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ApiBatchBytesWithConfigsAsync(t *testing.T) {
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}
func Test_ApiBatchFileAsync(t *testing.T) {
// Tests async batch file extraction API (batch_extract_file)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ApiBatchFileWithConfigsAsync(t *testing.T) {
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}
func Test_ApiExtractBytesAsync(t *testing.T) {
// Tests async bytes extraction API (extract_bytes)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ApiExtractFileAsync(t *testing.T) {
// Tests async file extraction API (extract_file)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ConfigChunkingPrependHeadingContext(t *testing.T) {
// Tests markdown chunker prepends heading hierarchy to chunk content
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`markdown/extraction_test.md`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'chunks' not available on result type
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil { return false }; for _, c := range chunks { if c.Content == "" { return false } }; return true }(), "expected true")
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil { return false }; for _, c := range chunks { if c.Metadata.HeadingContext == nil { return false } }; return true }(), "expected true")
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil || len(chunks) == 0 { return false }; return chunks[0].Metadata.HeadingContext != nil }(), "expected true")
}
func Test_ConfigDocumentStructureWithHeadings(t *testing.T) {
// Tests document structure with DOCX heading-driven nesting
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"include_document_structure":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`docx/fake.docx`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
// skipped: field 'document' not available on result type
// skipped: field 'document.nodes' not available on result type
}
func Test_ConfigElementTypes(t *testing.T) {
// Tests element-based result format with element type assertions on DOCX
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"result_format":"element_based"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`docx/unit_test_headers.docx`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
{
found := false
if strings.Contains(string(result.MimeType), `application/vnd.openxmlformats-officedocument.wordprocessingml.document`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
// skipped: field 'elements' not available on result type
}
func Test_ConfigExtractionTimeout(t *testing.T) {
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"extraction_timeout_secs":300}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
}
func Test_ConfigKeywords(t *testing.T) {
// Tests keyword extraction via YAKE algorithm
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"keywords":{"algorithm":"yake","max_keywords":10}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'keywords' not available on Go ExtractionResult
// skipped: field 'keywords' not available on Go ExtractionResult
}
func Test_ConfigPages(t *testing.T) {
// Tests page extraction and page marker configuration
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"pages":{"extract_pages":true,"insert_page_markers":true}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `PAGE`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ConfigQualityEnabled(t *testing.T) {
// Tests quality scoring produces a score value in [0.0, 1.0]
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"enable_quality_processing":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
}
func Test_ConfigSecurityLimits(t *testing.T) {
// Tests archive extraction with custom security limits
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`archives/documents.zip`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
{
found := false
if strings.Contains(string(result.MimeType), `application/zip`) { found = true }
if strings.Contains(string(result.MimeType), `application/x-zip-compressed`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
}
func Test_ConfigTreeSitter(t *testing.T) {
// Tests tree-sitter configuration round-trip
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`code/hello.py`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/x-source-code` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_OutputFormatBytesMarkdown(t *testing.T) {
// Tests markdown output format via bytes extraction API
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}
func Test_OutputFormatMarkdown(t *testing.T) {
// Tests Markdown output format
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}

59
e2e/go/detection_test.go generated Normal file
View File

@@ -0,0 +1,59 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: detection
package e2e_test
import (
"os"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_DetectMimeBytesHtml(t *testing.T) {
// Detect HTML MIME from bytes
contentBytes, contentBytesErr := os.ReadFile(`html/html.html`)
if contentBytesErr != nil {
t.Fatalf("read fixture html/html.html: %v", contentBytesErr)
}
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_DetectMimeBytesPdf(t *testing.T) {
// Detect PDF MIME type from bytes
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_DetectMimeBytesPng(t *testing.T) {
// Detect PNG MIME type from bytes
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_GetExtensionsUnknownMime(t *testing.T) {
// get_extensions unknown MIME
_, err := kreuzberg.GetExtensionsForMime(`application/x-totally-unknown`)
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: document_extractor_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_DocumentExtractorsClear(t *testing.T) {
// Clear all document extractors and verify list is empty
_ = kreuzberg.ClearDocumentExtractors()
}
func Test_ExtractorsList(t *testing.T) {
// List all registered document extractors
_, err := kreuzberg.ListDocumentExtractors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

61
e2e/go/embed_async_pending_test.go generated Normal file
View File

@@ -0,0 +1,61 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embed_async_pending
package e2e_test
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbedTextsAsyncEmptyInput(t *testing.T) {
// embed_texts_async: empty text list
var texts []string
if err := json.Unmarshal([]byte(`[]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.EmbedTextsAsync(texts, kreuzberg.EmbeddingConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.Equal(t, len(value), 0, "expected exactly 0 elements")
}
func Test_EmbedTextsAsyncHappy(t *testing.T) {
// embed_texts_async: basic async embedding
var texts []string
if err := json.Unmarshal([]byte(`["first","second"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.EmbedTextsAsync(texts, kreuzberg.EmbeddingConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.GreaterOrEqual(t, len(value), 2, "expected at least 2 elements")
}
func Test_EmbedTextsAsyncPresetSwitch(t *testing.T) {
// embed_texts_async: preset override
var texts []string
if err := json.Unmarshal([]byte(`["text"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
var config kreuzberg.EmbeddingConfig
if err := json.Unmarshal([]byte(`{"model":{"name":"balanced","type":"preset"}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.EmbedTextsAsync(texts, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

31
e2e/go/embed_extra_test.go generated Normal file
View File

@@ -0,0 +1,31 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embed_extra
package e2e_test
import (
"encoding/json"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbedTextsBatch(t *testing.T) {
// Batch embed texts
var texts []string
if err := json.Unmarshal([]byte(`["hello","world"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
var config kreuzberg.EmbeddingConfig
if err := json.Unmarshal([]byte(`{"model":{"name":"balanced","type":"preset"}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.EmbedTexts(texts, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embedding_backend_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbeddingBackendsClear(t *testing.T) {
// Clear all embedding backends and verify list is empty
_ = kreuzberg.ClearEmbeddingBackends()
}
func Test_EmbeddingBackendsList(t *testing.T) {
// List all registered embedding backends
_, err := kreuzberg.ListEmbeddingBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

62
e2e/go/embeddings_test.go generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embeddings
package e2e_test
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbedTextsDifferentPreset(t *testing.T) {
// embed_texts: multilingual preset
var texts []string
if err := json.Unmarshal([]byte(`["Hello world","test"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
var config kreuzberg.EmbeddingConfig
if err := json.Unmarshal([]byte(`{"model":{"name":"multilingual","type":"preset"}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.EmbedTexts(texts, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.GreaterOrEqual(t, len(value), 2, "expected at least 2 elements")
}
func Test_GetEmbeddingPresetKnown(t *testing.T) {
// get_embedding_preset: known preset
_ = kreuzberg.GetEmbeddingPreset(`balanced`)
}
func Test_GetEmbeddingPresetNominal(t *testing.T) {
// get_embedding_preset: nominal case
_ = kreuzberg.GetEmbeddingPreset(`balanced`)
}
func Test_GetEmbeddingPresetUnknown(t *testing.T) {
// get_embedding_preset: unknown preset fails
result := kreuzberg.GetEmbeddingPreset(`nonexistent-xyz`)
if result != nil {
t.Errorf("expected empty value, got %v", result)
}
}
func Test_ListEmbeddingPresetsSanity(t *testing.T) {
// list_embedding_presets: returns at least one
result := kreuzberg.ListEmbeddingPresets()
value := result
if len(value) == 0 {
t.Errorf("expected non-empty value")
}
}

80
e2e/go/error_test.go generated Normal file
View File

@@ -0,0 +1,80 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: error
package e2e_test
import (
"encoding/json"
"os"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ErrorEmptyBytes(t *testing.T) {
// Graceful handling of empty bytes (should not error)
contentBytes, contentBytesErr := os.ReadFile(`text/empty.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/empty.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `text/plain`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ErrorEmptyMime(t *testing.T) {
// Error when extracting with empty MIME type
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, ``, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_ErrorExtractBytesConflictingOcr(t *testing.T) {
// extract_bytes force+disable OCR
contentBytes, contentBytesErr := os.ReadFile(`text/fake_text.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/fake_text.txt: %v", contentBytesErr)
}
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"disable_ocr":true,"force_ocr":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `text/plain`, config)
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_ErrorInvalidMimeFormat(t *testing.T) {
// Error when extracting with invalid MIME type format
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `not-a-mime`, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_ErrorUnsupportedMime(t *testing.T) {
// Error when extracting with unsupported MIME type
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `application/x-nonexistent`, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

86
e2e/go/format_specific_test.go generated Normal file
View File

@@ -0,0 +1,86 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: format_specific
package e2e_test
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_FormatDocxStandalone(t *testing.T) {
// Standalone DOCX extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`)
if contentBytesErr != nil {
t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
}
func Test_FormatHwpxStandalone(t *testing.T) {
// Standalone HWPX extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`)
if contentBytesErr != nil {
t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
if !strings.Contains(string(result.Content), `Hello from HWPX`) {
t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content)
}
}
func Test_FormatPdfText(t *testing.T) {
// Standalone PDF text extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
{
found := false
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if strings.Contains(string(result.Content), `May`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_FormatPptx(t *testing.T) {
// PPTX presentation extraction using extract_file_sync
mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation`
_, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_FormatXlsx(t *testing.T) {
// XLSX spreadsheet extraction using extract_file_sync
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
_, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

10
e2e/go/go.mod generated Normal file
View File

@@ -0,0 +1,10 @@
module e2e_go
go 1.26
require (
github.com/kreuzberg-dev/kreuzberg/v5 v5.0.0-rc.3
github.com/stretchr/testify v1.11.1
)
replace github.com/kreuzberg-dev/kreuzberg/v5 => ../../packages/go/v5

13
e2e/go/helpers_test.go generated Normal file
View File

@@ -0,0 +1,13 @@
package e2e_test
import "encoding/json"
// jsonString converts a value to its JSON string representation.
// Array fields use jsonString instead of fmt.Sprint to preserve structure.
func jsonString(value any) string {
encoded, err := json.Marshal(value)
if err != nil {
return ""
}
return string(encoded)
}

87
e2e/go/main_test.go generated Normal file
View File

@@ -0,0 +1,87 @@
package e2e_test
import (
"bufio"
"encoding/json"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
)
func TestMain(m *testing.M) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
// Change to the configured test-documents directory (if it exists) so that fixture
// file paths like "pdf/fake_memo.pdf" resolve correctly when running go test
// from e2e/go/. Repos without document fixtures (web crawler, network clients) do
// not ship this directory — skip chdir and run from e2e/go/.
testDocumentsDir := filepath.Join(dir, "..", "..", "test_documents")
if info, err := os.Stat(testDocumentsDir); err == nil && info.IsDir() {
if err := os.Chdir(testDocumentsDir); err != nil {
panic(err)
}
}
// If MOCK_SERVER_URL is already set, a parent process (e.g. `alef test-apps run`)
// started a shared mock-server and exported its URL (plus any MOCK_SERVERS /
// MOCK_SERVER_<FIXTURE_ID> vars). Use it as-is and do NOT spawn our own server.
if os.Getenv("MOCK_SERVER_URL") != "" {
os.Exit(m.Run())
}
// Start the mock HTTP server if it exists.
mockServerBin := filepath.Join(dir, "..", "rust", "target", "release", "mock-server")
if _, err := os.Stat(mockServerBin); err == nil {
fixturesDir := filepath.Join(dir, "..", "..", "fixtures")
cmd := exec.Command(mockServerBin, fixturesDir)
cmd.Stderr = os.Stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
panic(err)
}
// Keep a writable pipe to the mock-server's stdin so the
// server does not see EOF and exit immediately. The mock-server
// blocks reading stdin until the parent closes the pipe.
stdin, err := cmd.StdinPipe()
if err != nil {
panic(err)
}
if err := cmd.Start(); err != nil {
panic(err)
}
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "MOCK_SERVER_URL=") {
_ = os.Setenv("MOCK_SERVER_URL", strings.TrimPrefix(line, "MOCK_SERVER_URL="))
} else if strings.HasPrefix(line, "MOCK_SERVERS=") {
_jsonVal := strings.TrimPrefix(line, "MOCK_SERVERS=")
_ = os.Setenv("MOCK_SERVERS", _jsonVal)
// Parse the JSON map and set per-fixture env vars (MOCK_SERVER_<FIXTURE_ID>).
var _perFixture map[string]string
if err := json.Unmarshal([]byte(_jsonVal), &_perFixture); err == nil {
for _fid, _furl := range _perFixture {
_ = os.Setenv("MOCK_SERVER_"+strings.ToUpper(_fid), _furl)
}
}
break
} else if os.Getenv("MOCK_SERVER_URL") != "" {
break
}
}
go func() { _, _ = io.Copy(io.Discard, stdout) }()
code := m.Run()
_ = stdin.Close()
_ = cmd.Process.Signal(os.Interrupt)
_ = cmd.Wait()
os.Exit(code)
} else {
code := m.Run()
os.Exit(code)
}
}

58
e2e/go/mime_utilities_test.go generated Normal file
View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: mime_utilities
package e2e_test
import (
"os"
"strings"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_MimeDetectBytes(t *testing.T) {
// Detect MIME type from file bytes
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if !strings.Contains(string(result), `pdf`) {
t.Errorf("expected to contain %s, got %v", `pdf`, result)
}
}
func Test_MimeDetectImage(t *testing.T) {
// Detect MIME type from PNG image bytes
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
result, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if !strings.Contains(string(result), `png`) {
t.Errorf("expected to contain %s, got %v", `png`, result)
}
}
func Test_MimeGetExtensions(t *testing.T) {
// Get file extensions for a MIME type
result, err := kreuzberg.GetExtensionsForMime(`application/pdf`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
if !strings.Contains(jsonString(value), `pdf`) {
t.Errorf("expected to contain %s, got %v", `pdf`, value)
}
}

32
e2e/go/ocr_backend_management_test.go generated Normal file
View File

@@ -0,0 +1,32 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: ocr_backend_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_OcrBackendsClear(t *testing.T) {
// Clear all OCR backends and verify list is empty
_ = kreuzberg.ClearOcrBackends()
}
func Test_OcrBackendsList(t *testing.T) {
// List all registered OCR backends
_, err := kreuzberg.ListOcrBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_OcrBackendsUnregister(t *testing.T) {
// Unregister nonexistent OCR backend gracefully
_ = kreuzberg.UnregisterOcrBackend(`nonexistent-backend-xyz`)
}

43
e2e/go/pdf_test.go generated Normal file
View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: pdf
package e2e_test
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_RenderPdfPageFirst(t *testing.T) {
// render_pdf_page_to_png: first page
pdf_bytesBytes, pdf_bytesBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if pdf_bytesBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", pdf_bytesBytesErr)
}
result, err := kreuzberg.RenderPdfPageToPng(pdf_bytesBytes, 0, nil, nil)
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.GreaterOrEqual(t, len(value), 100, "expected length >= 100")
}
func Test_RenderPdfPageOutOfRange(t *testing.T) {
// render_pdf_page_to_png: page out of range
pdf_bytesBytes, pdf_bytesBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if pdf_bytesBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", pdf_bytesBytesErr)
}
_, err := kreuzberg.RenderPdfPageToPng(pdf_bytesBytes, 999, nil, nil)
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

148
e2e/go/plugin_api_test.go generated Normal file
View File

@@ -0,0 +1,148 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: plugin_api
package e2e_test
import (
"encoding/json"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
type testStub_register_document_extractor_trait_bridge struct{}
func (testStub_register_document_extractor_trait_bridge) ExtractBytes(content []byte, mimeType string, config kreuzberg.ExtractionConfig) (json.RawMessage, error) { return nil, nil }
func (testStub_register_document_extractor_trait_bridge) ExtractFile(path string, mimeType string, config kreuzberg.ExtractionConfig) (json.RawMessage, error) { return nil, nil }
func (testStub_register_document_extractor_trait_bridge) SupportedMimeTypes() []string { return nil }
func (testStub_register_document_extractor_trait_bridge) Priority() int32 { return 0 }
func (testStub_register_document_extractor_trait_bridge) CanHandle(path string, mimeType string) bool { return false }
func (testStub_register_document_extractor_trait_bridge) Name() string { return "" }
func (testStub_register_document_extractor_trait_bridge) Version() string { return "" }
func (testStub_register_document_extractor_trait_bridge) Initialize() error { return nil }
func (testStub_register_document_extractor_trait_bridge) Shutdown() error { return nil }
func (testStub_register_document_extractor_trait_bridge) Description() string { return "" }
func (testStub_register_document_extractor_trait_bridge) Author() string { return "" }
func Test_RegisterDocumentExtractorTraitBridge(t *testing.T) {
// register_document_extractor: trait bridge
_ = kreuzberg.RegisterDocumentExtractor(testStub_register_document_extractor_trait_bridge{})
}
type testStub_register_embedding_backend_trait_bridge struct{}
func (testStub_register_embedding_backend_trait_bridge) Dimensions() uint { return 0 }
func (testStub_register_embedding_backend_trait_bridge) Embed(texts []string) ([][]float32, error) { return nil, nil }
func (testStub_register_embedding_backend_trait_bridge) Name() string { return "" }
func (testStub_register_embedding_backend_trait_bridge) Version() string { return "" }
func (testStub_register_embedding_backend_trait_bridge) Initialize() error { return nil }
func (testStub_register_embedding_backend_trait_bridge) Shutdown() error { return nil }
func (testStub_register_embedding_backend_trait_bridge) Description() string { return "" }
func (testStub_register_embedding_backend_trait_bridge) Author() string { return "" }
func Test_RegisterEmbeddingBackendTraitBridge(t *testing.T) {
// register_embedding_backend: trait bridge
_ = kreuzberg.RegisterEmbeddingBackend(testStub_register_embedding_backend_trait_bridge{})
}
type testStub_register_ocr_backend_trait_bridge struct{}
func (testStub_register_ocr_backend_trait_bridge) ProcessImage(imageBytes []byte, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
func (testStub_register_ocr_backend_trait_bridge) ProcessImageFile(path string, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
func (testStub_register_ocr_backend_trait_bridge) SupportsLanguage(lang string) bool { return false }
func (testStub_register_ocr_backend_trait_bridge) BackendType() kreuzberg.OcrBackendType { return "" }
func (testStub_register_ocr_backend_trait_bridge) SupportedLanguages() []string { return nil }
func (testStub_register_ocr_backend_trait_bridge) SupportsTableDetection() bool { return false }
func (testStub_register_ocr_backend_trait_bridge) SupportsDocumentProcessing() bool { return false }
func (testStub_register_ocr_backend_trait_bridge) ProcessDocument(path string, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
func (testStub_register_ocr_backend_trait_bridge) Name() string { return "" }
func (testStub_register_ocr_backend_trait_bridge) Version() string { return "" }
func (testStub_register_ocr_backend_trait_bridge) Initialize() error { return nil }
func (testStub_register_ocr_backend_trait_bridge) Shutdown() error { return nil }
func (testStub_register_ocr_backend_trait_bridge) Description() string { return "" }
func (testStub_register_ocr_backend_trait_bridge) Author() string { return "" }
func Test_RegisterOcrBackendTraitBridge(t *testing.T) {
// register_ocr_backend: trait bridge
_ = kreuzberg.RegisterOcrBackend(testStub_register_ocr_backend_trait_bridge{})
}
type testStub_register_post_processor_trait_bridge struct{}
func (testStub_register_post_processor_trait_bridge) Process(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) error { return nil }
func (testStub_register_post_processor_trait_bridge) ProcessingStage() kreuzberg.ProcessingStage { return "" }
func (testStub_register_post_processor_trait_bridge) ShouldProcess(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) bool { return false }
func (testStub_register_post_processor_trait_bridge) EstimatedDurationMs(result kreuzberg.ExtractionResult) uint64 { return 0 }
func (testStub_register_post_processor_trait_bridge) Priority() int32 { return 0 }
func (testStub_register_post_processor_trait_bridge) Name() string { return "" }
func (testStub_register_post_processor_trait_bridge) Version() string { return "" }
func (testStub_register_post_processor_trait_bridge) Initialize() error { return nil }
func (testStub_register_post_processor_trait_bridge) Shutdown() error { return nil }
func (testStub_register_post_processor_trait_bridge) Description() string { return "" }
func (testStub_register_post_processor_trait_bridge) Author() string { return "" }
func Test_RegisterPostProcessorTraitBridge(t *testing.T) {
// register_post_processor: trait bridge
_ = kreuzberg.RegisterPostProcessor(testStub_register_post_processor_trait_bridge{})
}
type testStub_register_renderer_trait_bridge struct{}
func (testStub_register_renderer_trait_bridge) Render(doc json.RawMessage) (string, error) { return "", nil }
func (testStub_register_renderer_trait_bridge) Name() string { return "" }
func (testStub_register_renderer_trait_bridge) Version() string { return "" }
func (testStub_register_renderer_trait_bridge) Initialize() error { return nil }
func (testStub_register_renderer_trait_bridge) Shutdown() error { return nil }
func (testStub_register_renderer_trait_bridge) Description() string { return "" }
func (testStub_register_renderer_trait_bridge) Author() string { return "" }
func Test_RegisterRendererTraitBridge(t *testing.T) {
// register_renderer: trait bridge
_ = kreuzberg.RegisterRenderer(testStub_register_renderer_trait_bridge{})
}
type testStub_register_validator_trait_bridge struct{}
func (testStub_register_validator_trait_bridge) Validate(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) error { return nil }
func (testStub_register_validator_trait_bridge) ShouldValidate(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) bool { return false }
func (testStub_register_validator_trait_bridge) Priority() int32 { return 0 }
func (testStub_register_validator_trait_bridge) Name() string { return "" }
func (testStub_register_validator_trait_bridge) Version() string { return "" }
func (testStub_register_validator_trait_bridge) Initialize() error { return nil }
func (testStub_register_validator_trait_bridge) Shutdown() error { return nil }
func (testStub_register_validator_trait_bridge) Description() string { return "" }
func (testStub_register_validator_trait_bridge) Author() string { return "" }
func Test_RegisterValidatorTraitBridge(t *testing.T) {
// register_validator: trait bridge
_ = kreuzberg.RegisterValidator(testStub_register_validator_trait_bridge{})
}
func Test_UnregisterDocumentExtractorAfterRegister(t *testing.T) {
// unregister_document_extractor
_ = kreuzberg.UnregisterDocumentExtractor(`test-extractor`)
}
func Test_UnregisterEmbeddingBackendAfterRegister(t *testing.T) {
// unregister_embedding_backend
_ = kreuzberg.UnregisterEmbeddingBackend(`test-embedding-backend`)
}
func Test_UnregisterPostProcessorAfterRegister(t *testing.T) {
// unregister_post_processor
_ = kreuzberg.UnregisterPostProcessor(`test-processor`)
}
func Test_UnregisterRendererAfterRegister(t *testing.T) {
// unregister_renderer
_ = kreuzberg.UnregisterRenderer(`test-renderer`)
}
func Test_UnregisterValidatorAfterRegister(t *testing.T) {
// unregister_validator
_ = kreuzberg.UnregisterValidator(`test-validator`)
}

27
e2e/go/post_processor_management_test.go generated Normal file
View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: post_processor_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_PostProcessorsClear(t *testing.T) {
// Clear all post-processors and verify list is empty
_ = kreuzberg.ClearPostProcessors()
}
func Test_PostProcessorsList(t *testing.T) {
// List all registered post-processors
_, err := kreuzberg.ListPostProcessors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

38
e2e/go/registry_operations_test.go generated Normal file
View File

@@ -0,0 +1,38 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: registry_operations
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ExtensionsDocx(t *testing.T) {
// Get file extensions for DOCX MIME type
_, err := kreuzberg.GetExtensionsForMime(`application/vnd.openxmlformats-officedocument.wordprocessingml.document`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ExtensionsHtml(t *testing.T) {
// Get file extensions for HTML MIME type
_, err := kreuzberg.GetExtensionsForMime(`text/html`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ExtensionsPdf(t *testing.T) {
// Get file extensions for PDF MIME type
_, err := kreuzberg.GetExtensionsForMime(`application/pdf`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

62
e2e/go/registry_test.go generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: registry
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ListDocumentExtractors(t *testing.T) {
// List document extractors
_, err := kreuzberg.ListDocumentExtractors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListEmbeddingBackends(t *testing.T) {
// List embedding backends
_, err := kreuzberg.ListEmbeddingBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListOcrBackends(t *testing.T) {
// List OCR backends
_, err := kreuzberg.ListOcrBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListPostProcessors(t *testing.T) {
// List post-processors
_, err := kreuzberg.ListPostProcessors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListRenderers(t *testing.T) {
// List renderers
_, err := kreuzberg.ListRenderers()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListValidators(t *testing.T) {
// List validators
_, err := kreuzberg.ListValidators()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

27
e2e/go/renderer_management_test.go generated Normal file
View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: renderer_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_RenderersClear(t *testing.T) {
// Clear all renderers and verify list is empty
_ = kreuzberg.ClearRenderers()
}
func Test_RenderersList(t *testing.T) {
// List all registered renderers
_, err := kreuzberg.ListRenderers()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

196
e2e/go/smoke_test.go generated Normal file
View File

@@ -0,0 +1,196 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: smoke
package e2e_test
import (
"encoding/json"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_OcrImagePng(t *testing.T) {
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytes(contentBytes, `image/png`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 1, "expected length >= 1")
{
found := false
if strings.Contains(string(result.Content), `Hello`) { found = true }
if strings.Contains(string(result.Content), `World`) { found = true }
if strings.Contains(string(result.Content), `hello`) { found = true }
if strings.Contains(string(result.Content), `world`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeDocxBasic(t *testing.T) {
// Smoke test: DOCX with formatted text
mime_typeVal := `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
result, err := kreuzberg.ExtractFile(`docx/fake.docx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
{
found := false
if strings.Contains(string(result.Content), `Lorem`) { found = true }
if strings.Contains(string(result.Content), `ipsum`) { found = true }
if strings.Contains(string(result.Content), `document`) { found = true }
if strings.Contains(string(result.Content), `text`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeHtmlBasic(t *testing.T) {
// Smoke test: HTML table extraction
mime_typeVal := `text/html`
result, err := kreuzberg.ExtractFile(`html/simple_table.html`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/html` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `Sample Data Table`) { found = true }
if strings.Contains(string(result.Content), `Laptop`) { found = true }
if strings.Contains(string(result.Content), `Electronics`) { found = true }
if strings.Contains(string(result.Content), `Product`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeImagePng(t *testing.T) {
// Smoke test: PNG image (without OCR, metadata only)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"disable_ocr":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`images/sample.png`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
}
func Test_SmokeJsonBasic(t *testing.T) {
// Smoke test: JSON file extraction
mime_typeVal := `application/json`
result, err := kreuzberg.ExtractFile(`json/simple.json`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/json` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_SmokePdfBasic(t *testing.T) {
// Smoke test: PDF with simple text extraction
mime_typeVal := `application/pdf`
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `To Whom it May Concern`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeTxtBasic(t *testing.T) {
// Smoke test: Plain text file
mime_typeVal := `text/plain`
result, err := kreuzberg.ExtractFile(`text/report.txt`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/plain` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_SmokeXlsxBasic(t *testing.T) {
// Smoke test: XLSX with basic spreadsheet data including tables
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
result, err := kreuzberg.ExtractFile(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 100, "expected length >= 100")
if !strings.Contains(string(result.Content), `Team`) {
t.Errorf("expected to contain %s", `Team`)
}
if !strings.Contains(string(result.Content), `Location`) {
t.Errorf("expected to contain %s", `Location`)
}
if !strings.Contains(string(result.Content), `Stanley Cups`) {
t.Errorf("expected to contain %s", `Stanley Cups`)
}
if !strings.Contains(string(result.Content), `Blues`) {
t.Errorf("expected to contain %s", `Blues`)
}
if !strings.Contains(string(result.Content), `Flyers`) {
t.Errorf("expected to contain %s", `Flyers`)
}
if !strings.Contains(string(result.Content), `Maple Leafs`) {
t.Errorf("expected to contain %s", `Maple Leafs`)
}
if !strings.Contains(string(result.Content), `STL`) {
t.Errorf("expected to contain %s", `STL`)
}
if !strings.Contains(string(result.Content), `PHI`) {
t.Errorf("expected to contain %s", `PHI`)
}
if !strings.Contains(string(result.Content), `TOR`) {
t.Errorf("expected to contain %s", `TOR`)
}
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}

27
e2e/go/validator_management_test.go generated Normal file
View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: validator_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ValidatorsClear(t *testing.T) {
// Clear all validators and verify list is empty
_ = kreuzberg.ClearValidators()
}
func Test_ValidatorsList(t *testing.T) {
// List all registered validators
_, err := kreuzberg.ListValidators()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

Some files were not shown because too many files have changed in this diff Show More