This commit is contained in:
20
e2e/csharp/Kreuzberg.E2eTests.csproj
generated
Normal file
20
e2e/csharp/Kreuzberg.E2eTests.csproj
generated
Normal file
@@ -0,0 +1,20 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<IsPackable>false</IsPackable>
|
||||
<IsTestProject>true</IsTestProject>
|
||||
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.5.1" />
|
||||
<PackageReference Include="xunit" Version="2.9.3" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.5" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../../packages/csharp/Kreuzberg/Kreuzberg.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
42
e2e/csharp/TestSetup.cs
generated
Normal file
42
e2e/csharp/TestSetup.cs
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Kreuzberg;
|
||||
|
||||
internal static class TestSetup
|
||||
{
|
||||
[ModuleInitializer]
|
||||
internal static void Init()
|
||||
{
|
||||
// Walk up from the assembly directory until we find the repo root.
|
||||
// Prefer a sibling test_documents/ directory (chdir into it so that
|
||||
// fixture paths like "docx/fake.docx" resolve relative to it). If that
|
||||
// is absent (web-crawler-style repos with no document fixtures), fall
|
||||
// back to a sibling alef.toml or fixtures/ marker as the repo root.
|
||||
var dir = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
DirectoryInfo? repoRoot = null;
|
||||
while (dir != null)
|
||||
{
|
||||
var documentsCandidate = Path.Combine(dir.FullName, "test_documents");
|
||||
if (Directory.Exists(documentsCandidate))
|
||||
{
|
||||
repoRoot = dir;
|
||||
Directory.SetCurrentDirectory(documentsCandidate);
|
||||
break;
|
||||
}
|
||||
if (File.Exists(Path.Combine(dir.FullName, "alef.toml"))
|
||||
|| Directory.Exists(Path.Combine(dir.FullName, "fixtures")))
|
||||
{
|
||||
repoRoot = dir;
|
||||
break;
|
||||
}
|
||||
dir = dir.Parent;
|
||||
}
|
||||
}
|
||||
}
|
||||
58
e2e/csharp/tests/AsyncTests.cs
generated
Normal file
58
e2e/csharp/tests/AsyncTests.cs
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: async.</summary>
|
||||
public class AsyncTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_AsyncExtractBytes()
|
||||
{
|
||||
// Async extract_bytes call on PDF document
|
||||
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_AsyncExtractBytesEmptyMime()
|
||||
{
|
||||
// extract_bytes empty MIME async
|
||||
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
|
||||
{
|
||||
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "", ExtractionConfig.FromJson("{}"));
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_AsyncExtractBytesInvalidMime()
|
||||
{
|
||||
// extract_bytes unsupported MIME async
|
||||
await Assert.ThrowsAnyAsync<KreuzbergException>(async () =>
|
||||
{
|
||||
await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", ExtractionConfig.FromJson("{}"));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
110
e2e/csharp/tests/BatchTests.cs
generated
Normal file
110
e2e/csharp/tests/BatchTests.cs
generated
Normal file
@@ -0,0 +1,110 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: batch.</summary>
|
||||
public class BatchTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchBytesInvalidMime()
|
||||
{
|
||||
// batch_extract_bytes_sync invalid MIME
|
||||
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111 }, MimeType = "application/x-nonexistent" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchExtractBytesHappy()
|
||||
{
|
||||
// batch_extract_bytes: happy path with mixed inputs
|
||||
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)72, (byte)101, (byte)108, (byte)108, (byte)111, (byte)44, (byte)32, (byte)119, (byte)111, (byte)114, (byte)108, (byte)100, (byte)33 }, MimeType = "text/plain" }, new BatchBytesItem { Content = new byte[] { (byte)60, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62, (byte)60, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)84, (byte)101, (byte)115, (byte)116, (byte)60, (byte)47, (byte)98, (byte)111, (byte)100, (byte)121, (byte)62, (byte)60, (byte)47, (byte)104, (byte)116, (byte)109, (byte)108, (byte)62 }, MimeType = "text/html" } }, null);
|
||||
Assert.True(result.Count >= 1, "expected at least 1 elements");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchExtractBytesMixedFormat()
|
||||
{
|
||||
// batch_extract_bytes: handles unsupported MIME gracefully
|
||||
var result = await KreuzbergLib.BatchExtractBytesAsync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)80, (byte)68, (byte)70, (byte)32, (byte)112, (byte)108, (byte)97, (byte)99, (byte)101, (byte)104, (byte)111, (byte)108, (byte)100, (byte)101, (byte)114 }, MimeType = "application/x-unknown" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchExtractBytesSyncEmptyList()
|
||||
{
|
||||
// batch_extract_bytes_sync: empty batch
|
||||
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { }, null);
|
||||
Assert.Equal(0, result.Count);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchExtractBytesSyncInvalidMime()
|
||||
{
|
||||
// batch_extract_bytes_sync: unsupported MIME
|
||||
var result = KreuzbergLib.BatchExtractBytesSync(new List<BatchBytesItem>() { new BatchBytesItem { Content = new byte[] { (byte)100, (byte)97, (byte)116, (byte)97 }, MimeType = "application/x-unknown" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchFileAsyncBasic()
|
||||
{
|
||||
// Extract text from multiple files asynchronously
|
||||
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_BatchFileAsyncNotFound()
|
||||
{
|
||||
// batch_extract_file async nonexistent
|
||||
var result = await KreuzbergLib.BatchExtractFilesAsync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchFileNotFound()
|
||||
{
|
||||
// batch_extract_file_sync nonexistent
|
||||
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "/nonexistent/a.pdf" }, new BatchFileItem { Path = "/nonexistent/b.txt" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchFilePartial()
|
||||
{
|
||||
// batch_extract_file_sync mixed
|
||||
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "text/plain.txt" }, new BatchFileItem { Path = "/nonexistent/missing.pdf" } }, null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_BatchFileSyncBasic()
|
||||
{
|
||||
// Extract text from multiple files synchronously
|
||||
var result = KreuzbergLib.BatchExtractFilesSync(new List<BatchFileItem>() { new BatchFileItem { Path = "pdf/fake_memo.pdf" }, new BatchFileItem { Path = "text/fake_text.txt" } }, null);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
40
e2e/csharp/tests/CodeTests.cs
generated
Normal file
40
e2e/csharp/tests/CodeTests.cs
generated
Normal file
@@ -0,0 +1,40 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: code.</summary>
|
||||
public class CodeTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_CodeShebangDetection()
|
||||
{
|
||||
// Test language detection from shebang line via bytes input
|
||||
var result = KreuzbergLib.ExtractFileSync("code/script.sh", "text/x-source-code", null);
|
||||
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.Contains("build", result.Content.ToString().ToLower());
|
||||
Assert.Contains("clean", result.Content.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
203
e2e/csharp/tests/ContractTests.cs
generated
Normal file
203
e2e/csharp/tests/ContractTests.cs
generated
Normal file
@@ -0,0 +1,203 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: contract.</summary>
|
||||
public class ContractTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchBytesAsync()
|
||||
{
|
||||
// Tests async batch bytes extraction API (batch_extract_bytes)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchBytesWithConfigsAsync()
|
||||
{
|
||||
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchFileAsync()
|
||||
{
|
||||
// Tests async batch file extraction API (batch_extract_file)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiBatchFileWithConfigsAsync()
|
||||
{
|
||||
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiExtractBytesAsync()
|
||||
{
|
||||
// Tests async bytes extraction API (extract_bytes)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_ApiExtractFileAsync()
|
||||
{
|
||||
// Tests async file extraction API (extract_file)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", null, null);
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("Mallori"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigChunkingPrependHeadingContext()
|
||||
{
|
||||
// Tests markdown chunker prepends heading hierarchy to chunk content
|
||||
var result = KreuzbergLib.ExtractFileSync("markdown/extraction_test.md", null, ExtractionConfig.FromJson("{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}"));
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'chunks' not available on result typeAssert.True((result.Chunks ?? new()).All(c => !string.IsNullOrEmpty(c.Content)));
|
||||
Assert.True((result.Chunks ?? new()).All(c => c.Metadata?.HeadingContext != null));
|
||||
Assert.True((result.Chunks ?? new()).FirstOrDefault()?.Metadata?.HeadingContext != null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigDocumentStructureWithHeadings()
|
||||
{
|
||||
// Tests document structure with DOCX heading-driven nesting
|
||||
var result = KreuzbergLib.ExtractFileSync("docx/fake.docx", null, ExtractionConfig.FromJson("{\"include_document_structure\":true}"));
|
||||
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
|
||||
// skipped: field 'document' not available on result type // skipped: field 'document.nodes' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigElementTypes()
|
||||
{
|
||||
// Tests element-based result format with element type assertions on DOCX
|
||||
var result = KreuzbergLib.ExtractFileSync("docx/unit_test_headers.docx", null, ExtractionConfig.FromJson("{\"result_format\":\"element_based\"}"));
|
||||
Assert.True(result.MimeType.ToString().Contains("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), "expected to contain at least one of the specified values");
|
||||
// skipped: field 'elements' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigExtractionTimeout()
|
||||
{
|
||||
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"extraction_timeout_secs\":300}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigKeywords()
|
||||
{
|
||||
// Tests keyword extraction via YAKE algorithm
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'keywords' not available on C# ExtractionResult // skipped: field 'keywords' not available on C# ExtractionResult
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigPages()
|
||||
{
|
||||
// Tests page extraction and page marker configuration
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("PAGE"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigQualityEnabled()
|
||||
{
|
||||
// Tests quality scoring produces a score value in [0.0, 1.0]
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"enable_quality_processing\":true}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type // skipped: field 'quality_score' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigSecurityLimits()
|
||||
{
|
||||
// Tests archive extraction with custom security limits
|
||||
var result = KreuzbergLib.ExtractFileSync("archives/documents.zip", null, ExtractionConfig.FromJson("{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}"));
|
||||
Assert.True(result.MimeType.ToString().Contains("application/zip") || result.MimeType.ToString().Contains("application/x-zip-compressed"), "expected to contain at least one of the specified values");
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ConfigTreeSitter()
|
||||
{
|
||||
// Tests tree-sitter configuration round-trip
|
||||
var result = KreuzbergLib.ExtractFileSync("code/hello.py", null, ExtractionConfig.FromJson("{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}"));
|
||||
Assert.Equal("text/x-source-code", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OutputFormatBytesMarkdown()
|
||||
{
|
||||
// Tests markdown output format via bytes extraction API
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", new ExtractionConfig { OutputFormat = OutputFormat.Markdown });
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OutputFormatMarkdown()
|
||||
{
|
||||
// Tests Markdown output format
|
||||
var result = KreuzbergLib.ExtractFileSync("pdf/fake_memo.pdf", null, ExtractionConfig.FromJson("{\"output_format\":\"markdown\"}"));
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
62
e2e/csharp/tests/DetectionTests.cs
generated
Normal file
62
e2e/csharp/tests/DetectionTests.cs
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: detection.</summary>
|
||||
public class DetectionTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_DetectMimeBytesHtml()
|
||||
{
|
||||
// Detect HTML MIME from bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("html/html.html"));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_DetectMimeBytesPdf()
|
||||
{
|
||||
// Detect PDF MIME type from bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_DetectMimeBytesPng()
|
||||
{
|
||||
// Detect PNG MIME type from bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetExtensionsUnknownMime()
|
||||
{
|
||||
// get_extensions unknown MIME
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.GetExtensionsForMime("application/x-totally-unknown");
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/DocumentExtractorManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/DocumentExtractorManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: document_extractor_management.</summary>
|
||||
public class DocumentExtractorManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_DocumentExtractorsClear()
|
||||
{
|
||||
// Clear all document extractors and verify list is empty
|
||||
KreuzbergLib.ClearDocumentExtractors();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtractorsList()
|
||||
{
|
||||
// List all registered document extractors
|
||||
var result = KreuzbergLib.ListDocumentExtractors();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
54
e2e/csharp/tests/EmbedAsyncPendingTests.cs
generated
Normal file
54
e2e/csharp/tests/EmbedAsyncPendingTests.cs
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embed_async_pending.</summary>
|
||||
public class EmbedAsyncPendingTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_EmbedTextsAsyncEmptyInput()
|
||||
{
|
||||
// embed_texts_async: empty text list
|
||||
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { }, null);
|
||||
Assert.True(result.Count == 0);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_EmbedTextsAsyncHappy()
|
||||
{
|
||||
// embed_texts_async: basic async embedding
|
||||
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"First\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Second\"", ConfigOptions)! }, null);
|
||||
Assert.True(result.Count >= 2);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_EmbedTextsAsyncPresetSwitch()
|
||||
{
|
||||
// embed_texts_async: preset override
|
||||
var result = await KreuzbergLib.EmbedTextsAsync(new List<String>() { JsonSerializer.Deserialize<String>("\"Text\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
36
e2e/csharp/tests/EmbedExtraTests.cs
generated
Normal file
36
e2e/csharp/tests/EmbedExtraTests.cs
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embed_extra.</summary>
|
||||
public class EmbedExtraTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbedTextsBatch()
|
||||
{
|
||||
// Batch embed texts
|
||||
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"World\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"balanced\",\"type\":\"preset\"}", ConfigOptions)! });
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/EmbeddingBackendManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/EmbeddingBackendManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embedding_backend_management.</summary>
|
||||
public class EmbeddingBackendManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbeddingBackendsClear()
|
||||
{
|
||||
// Clear all embedding backends and verify list is empty
|
||||
KreuzbergLib.ClearEmbeddingBackends();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbeddingBackendsList()
|
||||
{
|
||||
// List all registered embedding backends
|
||||
var result = KreuzbergLib.ListEmbeddingBackends();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
71
e2e/csharp/tests/EmbeddingsTests.cs
generated
Normal file
71
e2e/csharp/tests/EmbeddingsTests.cs
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: embeddings.</summary>
|
||||
public class EmbeddingsTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_EmbedTextsDifferentPreset()
|
||||
{
|
||||
// embed_texts: multilingual preset
|
||||
var result = KreuzbergLib.EmbedTexts(new List<String>() { JsonSerializer.Deserialize<String>("\"Hello world\"", ConfigOptions)!, JsonSerializer.Deserialize<String>("\"Test\"", ConfigOptions)! }, new EmbeddingConfig { Model = JsonSerializer.Deserialize<EmbeddingModelType>("{\"name\":\"multilingual\",\"type\":\"preset\"}", ConfigOptions)! });
|
||||
Assert.True(result.Count >= 2);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetEmbeddingPresetKnown()
|
||||
{
|
||||
// get_embedding_preset: known preset
|
||||
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetEmbeddingPresetNominal()
|
||||
{
|
||||
// get_embedding_preset: nominal case
|
||||
var result = KreuzbergLib.GetEmbeddingPreset("balanced");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_GetEmbeddingPresetUnknown()
|
||||
{
|
||||
// get_embedding_preset: unknown preset fails
|
||||
var result = KreuzbergLib.GetEmbeddingPreset("nonexistent-xyz");
|
||||
Assert.True(string.IsNullOrEmpty(result?.ToString()));
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListEmbeddingPresetsSanity()
|
||||
{
|
||||
// list_embedding_presets: returns at least one
|
||||
var result = KreuzbergLib.ListEmbeddingPresets();
|
||||
Assert.NotEmpty(result);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
76
e2e/csharp/tests/ErrorTests.cs
generated
Normal file
76
e2e/csharp/tests/ErrorTests.cs
generated
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: error.</summary>
|
||||
public class ErrorTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorEmptyBytes()
|
||||
{
|
||||
// Graceful handling of empty bytes (should not error)
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/empty.txt"), "text/plain", new ExtractionConfig());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorEmptyMime()
|
||||
{
|
||||
// Error when extracting with empty MIME type
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "", new ExtractionConfig());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorExtractBytesConflictingOcr()
|
||||
{
|
||||
// extract_bytes force+disable OCR
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/fake_text.txt"), "text/plain", new ExtractionConfig { DisableOcr = true, ForceOcr = true });
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorInvalidMimeFormat()
|
||||
{
|
||||
// Error when extracting with invalid MIME type format
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "not-a-mime", new ExtractionConfig());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ErrorUnsupportedMime()
|
||||
{
|
||||
// Error when extracting with unsupported MIME type
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("text/plain.txt"), "application/x-nonexistent", new ExtractionConfig());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
73
e2e/csharp/tests/FormatSpecificTests.cs
generated
Normal file
73
e2e/csharp/tests/FormatSpecificTests.cs
generated
Normal file
@@ -0,0 +1,73 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: format_specific.</summary>
|
||||
public class FormatSpecificTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatDocxStandalone()
|
||||
{
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("docx/fake.docx"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document", null);
|
||||
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatHwpxStandalone()
|
||||
{
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("hwpx/simple.hwpx"), "application/haansofthwpx", null);
|
||||
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||||
Assert.Contains("hello from hwpx", result.Content.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatPdfText()
|
||||
{
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
var result = KreuzbergLib.ExtractBytesSync(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), "application/pdf", null);
|
||||
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||||
Assert.True(result.Content.ToString().Contains("Mallori") || result.Content.ToString().Contains("May"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatPptx()
|
||||
{
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
var result = KreuzbergLib.ExtractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", null);
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_FormatXlsx()
|
||||
{
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
var result = KreuzbergLib.ExtractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", null);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
55
e2e/csharp/tests/MimeUtilitiesTests.cs
generated
Normal file
55
e2e/csharp/tests/MimeUtilitiesTests.cs
generated
Normal file
@@ -0,0 +1,55 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: mime_utilities.</summary>
|
||||
public class MimeUtilitiesTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_MimeDetectBytes()
|
||||
{
|
||||
// Detect MIME type from file bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"));
|
||||
Assert.Contains("pdf", result.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_MimeDetectImage()
|
||||
{
|
||||
// Detect MIME type from PNG image bytes
|
||||
var result = KreuzbergLib.DetectMimeTypeFromBytes(System.IO.File.ReadAllBytes("images/test_hello_world.png"));
|
||||
Assert.Contains("png", result.ToString().ToLower());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_MimeGetExtensions()
|
||||
{
|
||||
// Get file extensions for a MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
|
||||
Assert.Contains("pdf", JsonSerializer.Serialize(result).ToLower());
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
50
e2e/csharp/tests/OcrBackendManagementTests.cs
generated
Normal file
50
e2e/csharp/tests/OcrBackendManagementTests.cs
generated
Normal file
@@ -0,0 +1,50 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: ocr_backend_management.</summary>
|
||||
public class OcrBackendManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_OcrBackendsClear()
|
||||
{
|
||||
// Clear all OCR backends and verify list is empty
|
||||
KreuzbergLib.ClearOcrBackends();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OcrBackendsList()
|
||||
{
|
||||
// List all registered OCR backends
|
||||
var result = KreuzbergLib.ListOcrBackends();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_OcrBackendsUnregister()
|
||||
{
|
||||
// Unregister nonexistent OCR backend gracefully
|
||||
KreuzbergLib.UnregisterOcrBackend("nonexistent-backend-xyz");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
48
e2e/csharp/tests/PdfTests.cs
generated
Normal file
48
e2e/csharp/tests/PdfTests.cs
generated
Normal file
@@ -0,0 +1,48 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: pdf.</summary>
|
||||
public class PdfTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderPdfPageFirst()
|
||||
{
|
||||
// render_pdf_page_to_png: first page
|
||||
var result = KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 0, null, null);
|
||||
Assert.NotNull(result);
|
||||
// skipped: assertion type 'min_length' not supported on byte[] result
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderPdfPageOutOfRange()
|
||||
{
|
||||
// render_pdf_page_to_png: page out of range
|
||||
Assert.ThrowsAny<KreuzbergException>(() =>
|
||||
{
|
||||
KreuzbergLib.RenderPdfPageToPng(System.IO.File.ReadAllBytes("pdf/fake_memo.pdf"), 999, null, null);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
234
e2e/csharp/tests/PluginApiTests.cs
generated
Normal file
234
e2e/csharp/tests/PluginApiTests.cs
generated
Normal file
@@ -0,0 +1,234 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: plugin_api.</summary>
|
||||
public class PluginApiTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterDocumentExtractorTraitBridge()
|
||||
{
|
||||
// register_document_extractor: trait bridge
|
||||
KreuzbergLib.RegisterDocumentExtractor(DocumentExtractorBridge.Register(new TestStub_RegisterDocumentExtractorTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterEmbeddingBackendTraitBridge()
|
||||
{
|
||||
// register_embedding_backend: trait bridge
|
||||
KreuzbergLib.RegisterEmbeddingBackend(EmbeddingBackendBridge.Register(new TestStub_RegisterEmbeddingBackendTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterOcrBackendTraitBridge()
|
||||
{
|
||||
// register_ocr_backend: trait bridge
|
||||
KreuzbergLib.RegisterOcrBackend(OcrBackendBridge.Register(new TestStub_RegisterOcrBackendTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterPostProcessorTraitBridge()
|
||||
{
|
||||
// register_post_processor: trait bridge
|
||||
KreuzbergLib.RegisterPostProcessor(PostProcessorBridge.Register(new TestStub_RegisterPostProcessorTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterRendererTraitBridge()
|
||||
{
|
||||
// register_renderer: trait bridge
|
||||
KreuzbergLib.RegisterRenderer(RendererBridge.Register(new TestStub_RegisterRendererTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RegisterValidatorTraitBridge()
|
||||
{
|
||||
// register_validator: trait bridge
|
||||
KreuzbergLib.RegisterValidator(ValidatorBridge.Register(new TestStub_RegisterValidatorTraitBridge()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterDocumentExtractorAfterRegister()
|
||||
{
|
||||
// unregister_document_extractor
|
||||
KreuzbergLib.UnregisterDocumentExtractor("test-extractor");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterEmbeddingBackendAfterRegister()
|
||||
{
|
||||
// unregister_embedding_backend
|
||||
KreuzbergLib.UnregisterEmbeddingBackend("test-embedding-backend");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterPostProcessorAfterRegister()
|
||||
{
|
||||
// unregister_post_processor
|
||||
KreuzbergLib.UnregisterPostProcessor("test-processor");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterRendererAfterRegister()
|
||||
{
|
||||
// unregister_renderer
|
||||
KreuzbergLib.UnregisterRenderer("test-renderer");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_UnregisterValidatorAfterRegister()
|
||||
{
|
||||
// unregister_validator
|
||||
KreuzbergLib.UnregisterValidator("test-validator");
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterDocumentExtractorTraitBridge : IDocumentExtractor
|
||||
{
|
||||
public string Name => "register_document_extractor_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public string ExtractBytes(byte[] content, string mimeType, ExtractionConfig config)
|
||||
=> "";
|
||||
public string ExtractFile(string path, string mimeType, ExtractionConfig config)
|
||||
=> "";
|
||||
public List<string> SupportedMimeTypes()
|
||||
=> [];
|
||||
public int Priority()
|
||||
=> 0;
|
||||
public bool CanHandle(string path, string mimeType)
|
||||
=> false;
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterEmbeddingBackendTraitBridge : IEmbeddingBackend
|
||||
{
|
||||
public string Name => "register_embedding_backend_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public ulong Dimensions()
|
||||
=> 0;
|
||||
public List<List<float>> Embed(List<string> texts)
|
||||
=> [];
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterOcrBackendTraitBridge : IOcrBackend
|
||||
{
|
||||
public string Name => "register_ocr_backend_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public ExtractionResult ProcessImage(byte[] imageBytes, OcrConfig config)
|
||||
=> new ExtractionResult();
|
||||
public ExtractionResult ProcessImageFile(string path, OcrConfig config)
|
||||
=> new ExtractionResult();
|
||||
public bool SupportsLanguage(string lang)
|
||||
=> false;
|
||||
public OcrBackendType BackendType()
|
||||
=> OcrBackendType.Tesseract;
|
||||
public List<string> SupportedLanguages()
|
||||
=> [];
|
||||
public bool SupportsTableDetection()
|
||||
=> false;
|
||||
public bool SupportsDocumentProcessing()
|
||||
=> false;
|
||||
public ExtractionResult ProcessDocument(string path, OcrConfig config)
|
||||
=> new ExtractionResult();
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterPostProcessorTraitBridge : IPostProcessor
|
||||
{
|
||||
public string Name => "register_post_processor_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public void Process(ExtractionResult result, ExtractionConfig config) { }
|
||||
public ProcessingStage ProcessingStage()
|
||||
=> ProcessingStage.Early;
|
||||
public bool ShouldProcess(ExtractionResult result, ExtractionConfig config)
|
||||
=> false;
|
||||
public ulong EstimatedDurationMs(ExtractionResult result)
|
||||
=> 0;
|
||||
public int Priority()
|
||||
=> 0;
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterRendererTraitBridge : IRenderer
|
||||
{
|
||||
public string Name => "register_renderer_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public string Render(string doc)
|
||||
=> "";
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
|
||||
private class TestStub_RegisterValidatorTraitBridge : IValidator
|
||||
{
|
||||
public string Name => "register_validator_trait_bridge";
|
||||
public string Version => "1.0.0";
|
||||
|
||||
public void Validate(ExtractionResult result, ExtractionConfig config) { }
|
||||
public bool ShouldValidate(ExtractionResult result, ExtractionConfig config)
|
||||
=> false;
|
||||
public int Priority()
|
||||
=> 0;
|
||||
public void Initialize() { }
|
||||
public void Shutdown() { }
|
||||
public string Description()
|
||||
=> "";
|
||||
public string Author()
|
||||
=> "";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/PostProcessorManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/PostProcessorManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: post_processor_management.</summary>
|
||||
public class PostProcessorManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_PostProcessorsClear()
|
||||
{
|
||||
// Clear all post-processors and verify list is empty
|
||||
KreuzbergLib.ClearPostProcessors();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_PostProcessorsList()
|
||||
{
|
||||
// List all registered post-processors
|
||||
var result = KreuzbergLib.ListPostProcessors();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
52
e2e/csharp/tests/RegistryOperationsTests.cs
generated
Normal file
52
e2e/csharp/tests/RegistryOperationsTests.cs
generated
Normal file
@@ -0,0 +1,52 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: registry_operations.</summary>
|
||||
public class RegistryOperationsTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtensionsDocx()
|
||||
{
|
||||
// Get file extensions for DOCX MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtensionsHtml()
|
||||
{
|
||||
// Get file extensions for HTML MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("text/html");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ExtensionsPdf()
|
||||
{
|
||||
// Get file extensions for PDF MIME type
|
||||
var result = KreuzbergLib.GetExtensionsForMime("application/pdf");
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
76
e2e/csharp/tests/RegistryTests.cs
generated
Normal file
76
e2e/csharp/tests/RegistryTests.cs
generated
Normal file
@@ -0,0 +1,76 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: registry.</summary>
|
||||
public class RegistryTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ListDocumentExtractors()
|
||||
{
|
||||
// List document extractors
|
||||
var result = KreuzbergLib.ListDocumentExtractors();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListEmbeddingBackends()
|
||||
{
|
||||
// List embedding backends
|
||||
var result = KreuzbergLib.ListEmbeddingBackends();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListOcrBackends()
|
||||
{
|
||||
// List OCR backends
|
||||
var result = KreuzbergLib.ListOcrBackends();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListPostProcessors()
|
||||
{
|
||||
// List post-processors
|
||||
var result = KreuzbergLib.ListPostProcessors();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListRenderers()
|
||||
{
|
||||
// List renderers
|
||||
var result = KreuzbergLib.ListRenderers();
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ListValidators()
|
||||
{
|
||||
// List validators
|
||||
var result = KreuzbergLib.ListValidators();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/RendererManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/RendererManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: renderer_management.</summary>
|
||||
public class RendererManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderersClear()
|
||||
{
|
||||
// Clear all renderers and verify list is empty
|
||||
KreuzbergLib.ClearRenderers();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_RenderersList()
|
||||
{
|
||||
// List all registered renderers
|
||||
var result = KreuzbergLib.ListRenderers();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
120
e2e/csharp/tests/SmokeTests.cs
generated
Normal file
120
e2e/csharp/tests/SmokeTests.cs
generated
Normal file
@@ -0,0 +1,120 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: smoke.</summary>
|
||||
public class SmokeTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public async Task Test_OcrImagePng()
|
||||
{
|
||||
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
|
||||
var result = await KreuzbergLib.ExtractBytesAsync(System.IO.File.ReadAllBytes("images/test_hello_world.png"), "image/png", ExtractionConfig.FromJson("{}"));
|
||||
Assert.Equal("image/png", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 1, "expected length >= 1");
|
||||
Assert.True(result.Content.ToString().Contains("Hello") || result.Content.ToString().Contains("World") || result.Content.ToString().Contains("hello") || result.Content.ToString().Contains("world"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeDocxBasic()
|
||||
{
|
||||
// Smoke test: DOCX with formatted text
|
||||
var result = await KreuzbergLib.ExtractFileAsync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", new ExtractionConfig());
|
||||
Assert.Equal("application/vnd.openxmlformats-officedocument.wordprocessingml.document", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 20, "expected length >= 20");
|
||||
Assert.True(result.Content.ToString().Contains("Lorem") || result.Content.ToString().Contains("ipsum") || result.Content.ToString().Contains("document") || result.Content.ToString().Contains("text"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeHtmlBasic()
|
||||
{
|
||||
// Smoke test: HTML table extraction
|
||||
var result = await KreuzbergLib.ExtractFileAsync("html/simple_table.html", "text/html", new ExtractionConfig());
|
||||
Assert.Equal("text/html", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 10, "expected length >= 10");
|
||||
Assert.True(result.Content.ToString().Contains("Sample Data Table") || result.Content.ToString().Contains("Laptop") || result.Content.ToString().Contains("Electronics") || result.Content.ToString().Contains("Product"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeImagePng()
|
||||
{
|
||||
// Smoke test: PNG image (without OCR, metadata only)
|
||||
var result = await KreuzbergLib.ExtractFileAsync("images/sample.png", null, new ExtractionConfig { DisableOcr = true });
|
||||
Assert.Equal("image/png", result.MimeType!.Trim());
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeJsonBasic()
|
||||
{
|
||||
// Smoke test: JSON file extraction
|
||||
var result = await KreuzbergLib.ExtractFileAsync("json/simple.json", "application/json", new ExtractionConfig());
|
||||
Assert.Equal("application/json", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokePdfBasic()
|
||||
{
|
||||
// Smoke test: PDF with simple text extraction
|
||||
var result = await KreuzbergLib.ExtractFileAsync("pdf/fake_memo.pdf", "application/pdf", new ExtractionConfig());
|
||||
Assert.Equal("application/pdf", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 50, "expected length >= 50");
|
||||
Assert.True(result.Content.ToString().Contains("May 5, 2023") || result.Content.ToString().Contains("To Whom it May Concern"), "expected to contain at least one of the specified values");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeTxtBasic()
|
||||
{
|
||||
// Smoke test: Plain text file
|
||||
var result = await KreuzbergLib.ExtractFileAsync("text/report.txt", "text/plain", new ExtractionConfig());
|
||||
Assert.Equal("text/plain", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 5, "expected length >= 5");
|
||||
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Test_SmokeXlsxBasic()
|
||||
{
|
||||
// Smoke test: XLSX with basic spreadsheet data including tables
|
||||
var result = await KreuzbergLib.ExtractFileAsync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", new ExtractionConfig());
|
||||
Assert.Equal("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", result.MimeType!.Trim());
|
||||
Assert.True(result.Content.Length >= 100, "expected length >= 100");
|
||||
Assert.Contains("team", result.Content.ToString().ToLower());
|
||||
Assert.Contains("location", result.Content.ToString().ToLower());
|
||||
Assert.Contains("stanley cups", result.Content.ToString().ToLower());
|
||||
Assert.Contains("blues", result.Content.ToString().ToLower());
|
||||
Assert.Contains("flyers", result.Content.ToString().ToLower());
|
||||
Assert.Contains("maple leafs", result.Content.ToString().ToLower());
|
||||
Assert.Contains("stl", result.Content.ToString().ToLower());
|
||||
Assert.Contains("phi", result.Content.ToString().ToLower());
|
||||
Assert.Contains("tor", result.Content.ToString().ToLower());
|
||||
// skipped: field 'tables' not available on result type // skipped: field 'metadata.format.excel.sheet_count' not available on result type // skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
43
e2e/csharp/tests/ValidatorManagementTests.cs
generated
Normal file
43
e2e/csharp/tests/ValidatorManagementTests.cs
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Threading.Tasks;
|
||||
using Xunit;
|
||||
using Kreuzberg;
|
||||
using static Kreuzberg.KreuzbergLib;
|
||||
|
||||
namespace Kreuzberg
|
||||
{
|
||||
/// <summary>E2e tests for category: validator_management.</summary>
|
||||
public class ValidatorManagementTests
|
||||
{
|
||||
private static readonly JsonSerializerOptions ConfigOptions = new() { Converters = { new JsonStringEnumConverter(JsonNamingPolicy.SnakeCaseLower) }, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingDefault };
|
||||
|
||||
[Fact]
|
||||
public void Test_ValidatorsClear()
|
||||
{
|
||||
// Clear all validators and verify list is empty
|
||||
KreuzbergLib.ClearValidators();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Test_ValidatorsList()
|
||||
{
|
||||
// List all registered validators
|
||||
var result = KreuzbergLib.ListValidators();
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
4
e2e/dart/dart_test.yaml
generated
Normal file
4
e2e/dart/dart_test.yaml
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
# Generated by alef — DO NOT EDIT.
|
||||
# Run test files sequentially to avoid overwhelming the SUT with
|
||||
# concurrent keep-alive connections.
|
||||
concurrency: 1
|
||||
14
e2e/dart/pubspec.yaml
generated
Normal file
14
e2e/dart/pubspec.yaml
generated
Normal file
@@ -0,0 +1,14 @@
|
||||
name: e2e_dart
|
||||
version: 0.1.0
|
||||
publish_to: none
|
||||
|
||||
environment:
|
||||
sdk: ">=3.11.0 <4.0.0"
|
||||
|
||||
dependencies:
|
||||
kreuzberg:
|
||||
path: ../../packages/dart
|
||||
|
||||
dev_dependencies:
|
||||
test: ^1.25.0
|
||||
http: ^1.2.0
|
||||
68
e2e/dart/test/async_test.dart
generated
Normal file
68
e2e/dart/test/async_test.dart
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: async
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Async extract_bytes call on PDF document', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(50));
|
||||
});
|
||||
|
||||
test('extract_bytes empty MIME async', () async {
|
||||
await expectLater(KreuzbergBridge.extractBytes(File('text/plain.txt').readAsBytesSync(), ''), throwsA(anything));
|
||||
});
|
||||
|
||||
test('extract_bytes unsupported MIME async', () async {
|
||||
await expectLater(KreuzbergBridge.extractBytes(File('text/plain.txt').readAsBytesSync(), 'application/x-nonexistent'), throwsA(anything));
|
||||
});
|
||||
|
||||
}
|
||||
71
e2e/dart/test/batch_test.dart
generated
Normal file
71
e2e/dart/test/batch_test.dart
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
import 'dart:convert';
|
||||
|
||||
// E2e tests for category: batch
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Extract text from multiple files asynchronously', () async {
|
||||
final result = await KreuzbergBridge.batchExtractFiles([BatchFileItem(path: 'pdf/fake_memo.pdf'), BatchFileItem(path: 'text/fake_text.txt')]);
|
||||
});
|
||||
|
||||
test('batch_extract_file async nonexistent', () async {
|
||||
final result = await KreuzbergBridge.batchExtractFiles([BatchFileItem(path: '/nonexistent/a.pdf')]);
|
||||
});
|
||||
|
||||
test('batch_extract_file_sync nonexistent', () async {
|
||||
final result = await KreuzbergBridge.batchExtractFilesSync([BatchFileItem(path: '/nonexistent/a.pdf'), BatchFileItem(path: '/nonexistent/b.txt')]);
|
||||
});
|
||||
|
||||
test('batch_extract_file_sync mixed', () async {
|
||||
final result = await KreuzbergBridge.batchExtractFilesSync([BatchFileItem(path: 'text/plain.txt'), BatchFileItem(path: '/nonexistent/missing.pdf')]);
|
||||
});
|
||||
|
||||
test('Extract text from multiple files synchronously', () async {
|
||||
final result = await KreuzbergBridge.batchExtractFilesSync([BatchFileItem(path: 'pdf/fake_memo.pdf'), BatchFileItem(path: 'text/fake_text.txt')]);
|
||||
});
|
||||
|
||||
}
|
||||
62
e2e/dart/test/code_test.dart
generated
Normal file
62
e2e/dart/test/code_test.dart
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: code
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Test language detection from shebang line via bytes input', () async {
|
||||
final result = await KreuzbergBridge.extractFileSync('code/script.sh', 'text/x-source-code');
|
||||
expect(result.mimeType.toString().trim(), equals('text/x-source-code'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content, contains('build'));
|
||||
expect(result.content, contains('clean'));
|
||||
});
|
||||
|
||||
}
|
||||
184
e2e/dart/test/contract_test.dart
generated
Normal file
184
e2e/dart/test/contract_test.dart
generated
Normal file
@@ -0,0 +1,184 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: contract
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Tests async batch bytes extraction API (batch_extract_bytes)', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
|
||||
});
|
||||
|
||||
test('Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'metadata.output_format' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests async batch file extraction API (batch_extract_file)', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
|
||||
});
|
||||
|
||||
test('Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'metadata.output_format' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests async bytes extraction API (extract_bytes)', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
|
||||
});
|
||||
|
||||
test('Tests async file extraction API (extract_file)', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content.contains('May 5, 2023') || result.content.contains('Mallori'), isTrue);
|
||||
});
|
||||
|
||||
test('Tests markdown chunker prepends heading hierarchy to chunk content', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('markdown/extraction_test.md').readAsBytesSync(), 'text/markdown', _config);
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'chunks' not available on dart result type
|
||||
// skipped: field 'chunks_have_content' not available on dart result type
|
||||
// skipped: field 'chunks_have_heading_context' not available on dart result type
|
||||
// skipped: field 'first_chunk_starts_with_heading' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests document structure with DOCX heading-driven nesting', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: true, useLayoutForMarkdown: false, maxArchiveDepth: 3));
|
||||
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim()));
|
||||
// skipped: field 'document' not available on dart result type
|
||||
// skipped: field 'document.nodes' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests element-based result format with element type assertions on DOCX', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"result_format":"element_based"}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('docx/unit_test_headers.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', _config);
|
||||
expect(result.mimeType.contains('application/vnd.openxmlformats-officedocument.wordprocessingml.document'), isTrue);
|
||||
// skipped: field 'elements' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
});
|
||||
|
||||
test('Tests keyword extraction via YAKE algorithm', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"keywords":{"algorithm":"yake","max_keywords":10}}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'keywords' not available on dart result type
|
||||
// skipped: field 'keywords' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests page extraction and page marker configuration', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"pages":{"extract_pages":true,"insert_page_markers":true}}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content.contains('PAGE'), isTrue);
|
||||
});
|
||||
|
||||
test('Tests quality scoring produces a score value in [0.0, 1.0]', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: false, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'quality_score' not available on dart result type
|
||||
// skipped: field 'quality_score' not available on dart result type
|
||||
// skipped: field 'quality_score' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests archive extraction with custom security limits', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('archives/documents.zip').readAsBytesSync(), 'application/zip', _config);
|
||||
expect(result.mimeType.contains('application/zip') || result.mimeType.contains('application/x-zip-compressed'), isTrue);
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
});
|
||||
|
||||
test('Tests tree-sitter configuration round-trip', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}');
|
||||
final result = await KreuzbergBridge.extractFileSync('code/hello.py', 'text/x-source-code', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('text/x-source-code'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(5));
|
||||
});
|
||||
|
||||
test('Tests markdown output format via bytes extraction API', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'metadata.output_format' not available on dart result type
|
||||
});
|
||||
|
||||
test('Tests Markdown output format', () async {
|
||||
final _config = await createExtractionConfigFromJson(json: '{"output_format":"markdown"}');
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf', _config);
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
// skipped: field 'metadata.output_format' not available on dart result type
|
||||
});
|
||||
|
||||
}
|
||||
70
e2e/dart/test/detection_test.dart
generated
Normal file
70
e2e/dart/test/detection_test.dart
generated
Normal file
@@ -0,0 +1,70 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: detection
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Detect HTML MIME from bytes', () async {
|
||||
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('html/html.html').readAsBytesSync());
|
||||
});
|
||||
|
||||
test('Detect PDF MIME type from bytes', () async {
|
||||
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('pdf/fake_memo.pdf').readAsBytesSync());
|
||||
});
|
||||
|
||||
test('Detect PNG MIME type from bytes', () async {
|
||||
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('images/test_hello_world.png').readAsBytesSync());
|
||||
});
|
||||
|
||||
test('get_extensions unknown MIME', () async {
|
||||
await expectLater(KreuzbergBridge.getExtensionsForMime('application/x-totally-unknown'), throwsA(anything));
|
||||
});
|
||||
|
||||
}
|
||||
58
e2e/dart/test/document_extractor_management_test.dart
generated
Normal file
58
e2e/dart/test/document_extractor_management_test.dart
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: document_extractor_management
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Clear all document extractors and verify list is empty', () async {
|
||||
final result = await KreuzbergBridge.clearDocumentExtractors();
|
||||
});
|
||||
|
||||
test('List all registered document extractors', () async {
|
||||
final result = await KreuzbergBridge.listDocumentExtractors();
|
||||
});
|
||||
|
||||
}
|
||||
68
e2e/dart/test/embed_async_pending_test.dart
generated
Normal file
68
e2e/dart/test/embed_async_pending_test.dart
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
import 'dart:convert';
|
||||
|
||||
// E2e tests for category: embed_async_pending
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('embed_texts_async: empty text list', () async {
|
||||
final _config = await createEmbeddingConfigFromJson(json: '{}');
|
||||
final result = await KreuzbergBridge.embedTextsAsync(<String>[], _config);
|
||||
expect(result.length, equals(0));
|
||||
});
|
||||
|
||||
test('embed_texts_async: basic async embedding', () async {
|
||||
final _config = await createEmbeddingConfigFromJson(json: '{}');
|
||||
final result = await KreuzbergBridge.embedTextsAsync(<String>['First', 'Second'], _config);
|
||||
expect(result.length, greaterThanOrEqualTo(2));
|
||||
});
|
||||
|
||||
test('embed_texts_async: preset override', () async {
|
||||
final _config = await createEmbeddingConfigFromJson(json: '{"model":{"name":"balanced","type":"preset"}}');
|
||||
final result = await KreuzbergBridge.embedTextsAsync(<String>['Text'], _config);
|
||||
});
|
||||
|
||||
}
|
||||
56
e2e/dart/test/embed_extra_test.dart
generated
Normal file
56
e2e/dart/test/embed_extra_test.dart
generated
Normal file
@@ -0,0 +1,56 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
import 'dart:convert';
|
||||
|
||||
// E2e tests for category: embed_extra
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Batch embed texts', () async {
|
||||
final _config = await createEmbeddingConfigFromJson(json: '{"model":{"name":"balanced","type":"preset"}}');
|
||||
final result = await KreuzbergBridge.embedTexts(<String>['Hello', 'World'], _config);
|
||||
});
|
||||
|
||||
}
|
||||
58
e2e/dart/test/embedding_backend_management_test.dart
generated
Normal file
58
e2e/dart/test/embedding_backend_management_test.dart
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: embedding_backend_management
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Clear all embedding backends and verify list is empty', () async {
|
||||
final result = await KreuzbergBridge.clearEmbeddingBackends();
|
||||
});
|
||||
|
||||
test('List all registered embedding backends', () async {
|
||||
final result = await KreuzbergBridge.listEmbeddingBackends();
|
||||
});
|
||||
|
||||
}
|
||||
75
e2e/dart/test/embeddings_test.dart
generated
Normal file
75
e2e/dart/test/embeddings_test.dart
generated
Normal file
@@ -0,0 +1,75 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
import 'dart:convert';
|
||||
|
||||
// E2e tests for category: embeddings
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('embed_texts: multilingual preset', () async {
|
||||
final _config = await createEmbeddingConfigFromJson(json: '{"model":{"name":"multilingual","type":"preset"}}');
|
||||
final result = await KreuzbergBridge.embedTexts(<String>['Hello world', 'Test'], _config);
|
||||
expect(result.length, greaterThanOrEqualTo(2));
|
||||
});
|
||||
|
||||
test('get_embedding_preset: known preset', () async {
|
||||
final result = await KreuzbergBridge.getEmbeddingPreset('balanced');
|
||||
});
|
||||
|
||||
test('get_embedding_preset: nominal case', () async {
|
||||
final result = await KreuzbergBridge.getEmbeddingPreset('balanced');
|
||||
});
|
||||
|
||||
test('get_embedding_preset: unknown preset fails', () async {
|
||||
final result = await KreuzbergBridge.getEmbeddingPreset('nonexistent-xyz');
|
||||
expect(result, anyOf(isNull, isEmpty));
|
||||
});
|
||||
|
||||
test('list_embedding_presets: returns at least one', () async {
|
||||
final result = await KreuzbergBridge.listEmbeddingPresets();
|
||||
expect(result, isNotNull);
|
||||
});
|
||||
|
||||
}
|
||||
74
e2e/dart/test/error_test.dart
generated
Normal file
74
e2e/dart/test/error_test.dart
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: error
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Graceful handling of empty bytes (should not error)', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('text/empty.txt').readAsBytesSync(), 'text/plain');
|
||||
});
|
||||
|
||||
test('Error when extracting with empty MIME type', () async {
|
||||
await expectLater(KreuzbergBridge.extractBytesSync(File('text/plain.txt').readAsBytesSync(), ''), throwsA(anything));
|
||||
});
|
||||
|
||||
test('extract_bytes force+disable OCR', () async {
|
||||
await expectLater(KreuzbergBridge.extractBytesSync(File('text/fake_text.txt').readAsBytesSync(), 'text/plain', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: true, disableOcr: true, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3)), throwsA(anything));
|
||||
});
|
||||
|
||||
test('Error when extracting with invalid MIME type format', () async {
|
||||
await expectLater(KreuzbergBridge.extractBytesSync(File('text/plain.txt').readAsBytesSync(), 'not-a-mime'), throwsA(anything));
|
||||
});
|
||||
|
||||
test('Error when extracting with unsupported MIME type', () async {
|
||||
await expectLater(KreuzbergBridge.extractBytesSync(File('text/plain.txt').readAsBytesSync(), 'application/x-nonexistent'), throwsA(anything));
|
||||
});
|
||||
|
||||
}
|
||||
79
e2e/dart/test/format_specific_test.dart
generated
Normal file
79
e2e/dart/test/format_specific_test.dart
generated
Normal file
@@ -0,0 +1,79 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: format_specific
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Standalone DOCX extraction using extract_bytes_sync', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
|
||||
expect(result.content.length, greaterThanOrEqualTo(20));
|
||||
});
|
||||
|
||||
test('Standalone HWPX extraction using extract_bytes_sync', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('hwpx/simple.hwpx').readAsBytesSync(), 'application/haansofthwpx');
|
||||
expect(result.content.length, greaterThanOrEqualTo(20));
|
||||
expect(result.content, contains('Hello from HWPX'));
|
||||
});
|
||||
|
||||
test('Standalone PDF text extraction using extract_bytes_sync', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.content.length, greaterThanOrEqualTo(50));
|
||||
expect(result.content.contains('Mallori') || result.content.contains('May'), isTrue);
|
||||
});
|
||||
|
||||
test('PPTX presentation extraction using extract_file_sync', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('pptx/simple.pptx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.presentationml.presentation');
|
||||
});
|
||||
|
||||
test('XLSX spreadsheet extraction using extract_file_sync', () async {
|
||||
final result = await KreuzbergBridge.extractBytesSync(File('xlsx/stanley_cups.xlsx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
|
||||
});
|
||||
|
||||
}
|
||||
69
e2e/dart/test/mime_utilities_test.dart
generated
Normal file
69
e2e/dart/test/mime_utilities_test.dart
generated
Normal file
@@ -0,0 +1,69 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: mime_utilities
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('Detect MIME type from file bytes', () async {
|
||||
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('pdf/fake_memo.pdf').readAsBytesSync());
|
||||
expect(result, contains('pdf'));
|
||||
});
|
||||
|
||||
test('Detect MIME type from PNG image bytes', () async {
|
||||
final result = await KreuzbergBridge.detectMimeTypeFromBytes(File('images/test_hello_world.png').readAsBytesSync());
|
||||
expect(result, contains('png'));
|
||||
});
|
||||
|
||||
test('Get file extensions for a MIME type', () async {
|
||||
final result = await KreuzbergBridge.getExtensionsForMime('application/pdf');
|
||||
expect(result, contains('pdf'));
|
||||
});
|
||||
|
||||
}
|
||||
16
e2e/dart/test/minimal_test.dart
generated
Normal file
16
e2e/dart/test/minimal_test.dart
generated
Normal file
@@ -0,0 +1,16 @@
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:typed_data';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('text extraction works', () async {
|
||||
final content = Uint8List.fromList('Hello world'.codeUnits);
|
||||
final result = await KreuzbergBridge.extractBytesSync(content, 'text/plain');
|
||||
print('Text: ${result.content.substring(0, 5)}');
|
||||
});
|
||||
}
|
||||
62
e2e/dart/test/ocr_backend_management_test.dart
generated
Normal file
62
e2e/dart/test/ocr_backend_management_test.dart
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: ocr_backend_management
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Clear all OCR backends and verify list is empty', () async {
|
||||
final result = await KreuzbergBridge.clearOcrBackends();
|
||||
});
|
||||
|
||||
test('List all registered OCR backends', () async {
|
||||
final result = await KreuzbergBridge.listOcrBackends();
|
||||
});
|
||||
|
||||
test('Unregister nonexistent OCR backend gracefully', () async {
|
||||
final result = await KreuzbergBridge.unregisterOcrBackend('nonexistent-backend-xyz');
|
||||
});
|
||||
|
||||
}
|
||||
63
e2e/dart/test/pdf_test.dart
generated
Normal file
63
e2e/dart/test/pdf_test.dart
generated
Normal file
@@ -0,0 +1,63 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: pdf
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('render_pdf_page_to_png: first page', () async {
|
||||
final result = await KreuzbergBridge.renderPdfPageToPng(File('pdf/fake_memo.pdf').readAsBytesSync(), 0);
|
||||
expect(result.length, greaterThanOrEqualTo(100));
|
||||
});
|
||||
|
||||
test('render_pdf_page_to_png: page out of range', () async {
|
||||
await expectLater(KreuzbergBridge.renderPdfPageToPng(File('pdf/fake_memo.pdf').readAsBytesSync(), 999), throwsA(anything));
|
||||
});
|
||||
|
||||
}
|
||||
209
e2e/dart/test/plugin_api_test.dart
generated
Normal file
209
e2e/dart/test/plugin_api_test.dart
generated
Normal file
@@ -0,0 +1,209 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:typed_data';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart' show DocumentExtractor;
|
||||
import 'package:kreuzberg/kreuzberg.dart' show OcrBackend;
|
||||
import 'package:kreuzberg/kreuzberg.dart' show PostProcessor;
|
||||
import 'package:kreuzberg/kreuzberg.dart' show Renderer;
|
||||
import 'package:kreuzberg/kreuzberg.dart' show Validator;
|
||||
import 'package:kreuzberg/kreuzberg.dart' show EmbeddingBackend;
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: plugin_api
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
class TestStubRegisterDocumentExtractorTraitBridge extends DocumentExtractor {
|
||||
String get name => 'register_document_extractor_trait_bridge';
|
||||
Future<InternalDocumentBridge> extractBytes(Uint8List content, String mimeType, ExtractionConfig config) async => throw UnimplementedError();
|
||||
Future<InternalDocumentBridge> extractFile(String path, String mimeType, ExtractionConfig config) async => throw UnimplementedError();
|
||||
Future<List<String>> supportedMimeTypes() async => [];
|
||||
Future<int> priority() async => 1;
|
||||
Future<bool> canHandle(String path, String mimeType) async => false;
|
||||
}
|
||||
final _TestStubRegisterDocumentExtractorTraitBridge_instance = TestStubRegisterDocumentExtractorTraitBridge();
|
||||
Future<DocumentExtractorDartImpl> _createTestStubRegisterDocumentExtractorTraitBridgeWrapper() async => await createDocumentExtractorDartImpl(
|
||||
pluginName: 'register_document_extractor_trait_bridge',
|
||||
pluginVersion: '0.0.1',
|
||||
extractBytes: (Uint8List content, String mimeType, ExtractionConfig config) => _TestStubRegisterDocumentExtractorTraitBridge_instance.extractBytes(content, mimeType, config),
|
||||
extractFile: (String path, String mimeType, ExtractionConfig config) => _TestStubRegisterDocumentExtractorTraitBridge_instance.extractFile(path, mimeType, config),
|
||||
supportedMimeTypes: () => _TestStubRegisterDocumentExtractorTraitBridge_instance.supportedMimeTypes(),
|
||||
priority: () => _TestStubRegisterDocumentExtractorTraitBridge_instance.priority(),
|
||||
canHandle: (String path, String mimeType) => _TestStubRegisterDocumentExtractorTraitBridge_instance.canHandle(path, mimeType)
|
||||
);
|
||||
|
||||
|
||||
class TestStubRegisterEmbeddingBackendTraitBridge extends EmbeddingBackend {
|
||||
String get name => 'register_embedding_backend_trait_bridge';
|
||||
Future<int> dimensions() async => 1;
|
||||
Future<List<Float64List>> embed(List<String> texts) async => [];
|
||||
}
|
||||
final _TestStubRegisterEmbeddingBackendTraitBridge_instance = TestStubRegisterEmbeddingBackendTraitBridge();
|
||||
Future<EmbeddingBackendDartImpl> _createTestStubRegisterEmbeddingBackendTraitBridgeWrapper() async => await createEmbeddingBackendDartImpl(
|
||||
pluginName: 'register_embedding_backend_trait_bridge',
|
||||
pluginVersion: '0.0.1',
|
||||
dimensions: () => _TestStubRegisterEmbeddingBackendTraitBridge_instance.dimensions(),
|
||||
embed: (List<String> texts) => _TestStubRegisterEmbeddingBackendTraitBridge_instance.embed(texts)
|
||||
);
|
||||
|
||||
|
||||
class TestStubRegisterOcrBackendTraitBridge extends OcrBackend {
|
||||
String get name => 'register_ocr_backend_trait_bridge';
|
||||
Future<ExtractionResult> processImage(Uint8List imageBytes, OcrConfig config) async => throw UnimplementedError();
|
||||
Future<ExtractionResult> processImageFile(String path, OcrConfig config) async => throw UnimplementedError();
|
||||
Future<bool> supportsLanguage(String lang) async => false;
|
||||
Future<OcrBackendType> backendType() async => OcrBackendType.tesseract;
|
||||
Future<List<String>> supportedLanguages() async => [];
|
||||
Future<bool> supportsTableDetection() async => false;
|
||||
Future<bool> supportsDocumentProcessing() async => false;
|
||||
Future<ExtractionResult> processDocument(String path, OcrConfig config) async => throw UnimplementedError();
|
||||
}
|
||||
final _TestStubRegisterOcrBackendTraitBridge_instance = TestStubRegisterOcrBackendTraitBridge();
|
||||
Future<OcrBackendDartImpl> _createTestStubRegisterOcrBackendTraitBridgeWrapper() async => await createOcrBackendDartImpl(
|
||||
pluginName: 'register_ocr_backend_trait_bridge',
|
||||
pluginVersion: '0.0.1',
|
||||
processImage: (Uint8List imageBytes, OcrConfig config) => _TestStubRegisterOcrBackendTraitBridge_instance.processImage(imageBytes, config),
|
||||
processImageFile: (String path, OcrConfig config) => _TestStubRegisterOcrBackendTraitBridge_instance.processImageFile(path, config),
|
||||
supportsLanguage: (String lang) => _TestStubRegisterOcrBackendTraitBridge_instance.supportsLanguage(lang),
|
||||
backendType: () => _TestStubRegisterOcrBackendTraitBridge_instance.backendType(),
|
||||
supportedLanguages: () => _TestStubRegisterOcrBackendTraitBridge_instance.supportedLanguages(),
|
||||
supportsTableDetection: () => _TestStubRegisterOcrBackendTraitBridge_instance.supportsTableDetection(),
|
||||
supportsDocumentProcessing: () => _TestStubRegisterOcrBackendTraitBridge_instance.supportsDocumentProcessing(),
|
||||
processDocument: (String path, OcrConfig config) => _TestStubRegisterOcrBackendTraitBridge_instance.processDocument(path, config)
|
||||
);
|
||||
|
||||
|
||||
class TestStubRegisterPostProcessorTraitBridge extends PostProcessor {
|
||||
String get name => 'register_post_processor_trait_bridge';
|
||||
Future<void> process(ExtractionResult result, ExtractionConfig config) async => null;
|
||||
Future<ProcessingStage> processingStage() async => ProcessingStage.early;
|
||||
Future<bool> shouldProcess(ExtractionResult result, ExtractionConfig config) async => false;
|
||||
Future<int> estimatedDurationMs(ExtractionResult result) async => 1;
|
||||
Future<int> priority() async => 1;
|
||||
}
|
||||
final _TestStubRegisterPostProcessorTraitBridge_instance = TestStubRegisterPostProcessorTraitBridge();
|
||||
Future<PostProcessorDartImpl> _createTestStubRegisterPostProcessorTraitBridgeWrapper() async => await createPostProcessorDartImpl(
|
||||
pluginName: 'register_post_processor_trait_bridge',
|
||||
pluginVersion: '0.0.1',
|
||||
process: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterPostProcessorTraitBridge_instance.process(result, config),
|
||||
processingStage: () => _TestStubRegisterPostProcessorTraitBridge_instance.processingStage(),
|
||||
shouldProcess: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterPostProcessorTraitBridge_instance.shouldProcess(result, config),
|
||||
estimatedDurationMs: (ExtractionResult result) => _TestStubRegisterPostProcessorTraitBridge_instance.estimatedDurationMs(result),
|
||||
priority: () => _TestStubRegisterPostProcessorTraitBridge_instance.priority()
|
||||
);
|
||||
|
||||
|
||||
class TestStubRegisterRendererTraitBridge extends Renderer {
|
||||
String get name => 'register_renderer_trait_bridge';
|
||||
Future<String> render(InternalDocumentBridge doc) async => '';
|
||||
}
|
||||
final _TestStubRegisterRendererTraitBridge_instance = TestStubRegisterRendererTraitBridge();
|
||||
Future<RendererDartImpl> _createTestStubRegisterRendererTraitBridgeWrapper() async => await createRendererDartImpl(
|
||||
pluginName: 'register_renderer_trait_bridge',
|
||||
pluginVersion: '0.0.1',
|
||||
render: (InternalDocumentBridge doc) => _TestStubRegisterRendererTraitBridge_instance.render(doc)
|
||||
);
|
||||
|
||||
|
||||
class TestStubRegisterValidatorTraitBridge extends Validator {
|
||||
String get name => 'register_validator_trait_bridge';
|
||||
Future<void> validate(ExtractionResult result, ExtractionConfig config) async => null;
|
||||
Future<bool> shouldValidate(ExtractionResult result, ExtractionConfig config) async => false;
|
||||
Future<int> priority() async => 1;
|
||||
}
|
||||
final _TestStubRegisterValidatorTraitBridge_instance = TestStubRegisterValidatorTraitBridge();
|
||||
Future<ValidatorDartImpl> _createTestStubRegisterValidatorTraitBridgeWrapper() async => await createValidatorDartImpl(
|
||||
pluginName: 'register_validator_trait_bridge',
|
||||
pluginVersion: '0.0.1',
|
||||
validate: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterValidatorTraitBridge_instance.validate(result, config),
|
||||
shouldValidate: (ExtractionResult result, ExtractionConfig config) => _TestStubRegisterValidatorTraitBridge_instance.shouldValidate(result, config),
|
||||
priority: () => _TestStubRegisterValidatorTraitBridge_instance.priority()
|
||||
);
|
||||
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('register_document_extractor: trait bridge', () async {
|
||||
final result = await KreuzbergBridge.registerDocumentExtractor(await _createTestStubRegisterDocumentExtractorTraitBridgeWrapper());
|
||||
});
|
||||
|
||||
test('register_embedding_backend: trait bridge', () async {
|
||||
final result = await KreuzbergBridge.registerEmbeddingBackend(await _createTestStubRegisterEmbeddingBackendTraitBridgeWrapper());
|
||||
});
|
||||
|
||||
test('register_ocr_backend: trait bridge', () async {
|
||||
final result = await KreuzbergBridge.registerOcrBackend(await _createTestStubRegisterOcrBackendTraitBridgeWrapper());
|
||||
});
|
||||
|
||||
test('register_post_processor: trait bridge', () async {
|
||||
final result = await KreuzbergBridge.registerPostProcessor(await _createTestStubRegisterPostProcessorTraitBridgeWrapper());
|
||||
});
|
||||
|
||||
test('register_renderer: trait bridge', () async {
|
||||
final result = await KreuzbergBridge.registerRenderer(await _createTestStubRegisterRendererTraitBridgeWrapper());
|
||||
});
|
||||
|
||||
test('register_validator: trait bridge', () async {
|
||||
final result = await KreuzbergBridge.registerValidator(await _createTestStubRegisterValidatorTraitBridgeWrapper());
|
||||
});
|
||||
|
||||
test('unregister_document_extractor', () async {
|
||||
final result = await KreuzbergBridge.unregisterDocumentExtractor('test-extractor');
|
||||
});
|
||||
|
||||
test('unregister_embedding_backend', () async {
|
||||
final result = await KreuzbergBridge.unregisterEmbeddingBackend('test-embedding-backend');
|
||||
});
|
||||
|
||||
test('unregister_post_processor', () async {
|
||||
final result = await KreuzbergBridge.unregisterPostProcessor('test-processor');
|
||||
});
|
||||
|
||||
test('unregister_renderer', () async {
|
||||
final result = await KreuzbergBridge.unregisterRenderer('test-renderer');
|
||||
});
|
||||
|
||||
test('unregister_validator', () async {
|
||||
final result = await KreuzbergBridge.unregisterValidator('test-validator');
|
||||
});
|
||||
|
||||
}
|
||||
58
e2e/dart/test/post_processor_management_test.dart
generated
Normal file
58
e2e/dart/test/post_processor_management_test.dart
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: post_processor_management
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Clear all post-processors and verify list is empty', () async {
|
||||
final result = await KreuzbergBridge.clearPostProcessors();
|
||||
});
|
||||
|
||||
test('List all registered post-processors', () async {
|
||||
final result = await KreuzbergBridge.listPostProcessors();
|
||||
});
|
||||
|
||||
}
|
||||
62
e2e/dart/test/registry_operations_test.dart
generated
Normal file
62
e2e/dart/test/registry_operations_test.dart
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: registry_operations
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Get file extensions for DOCX MIME type', () async {
|
||||
final result = await KreuzbergBridge.getExtensionsForMime('application/vnd.openxmlformats-officedocument.wordprocessingml.document');
|
||||
});
|
||||
|
||||
test('Get file extensions for HTML MIME type', () async {
|
||||
final result = await KreuzbergBridge.getExtensionsForMime('text/html');
|
||||
});
|
||||
|
||||
test('Get file extensions for PDF MIME type', () async {
|
||||
final result = await KreuzbergBridge.getExtensionsForMime('application/pdf');
|
||||
});
|
||||
|
||||
}
|
||||
74
e2e/dart/test/registry_test.dart
generated
Normal file
74
e2e/dart/test/registry_test.dart
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: registry
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('List document extractors', () async {
|
||||
final result = await KreuzbergBridge.listDocumentExtractors();
|
||||
});
|
||||
|
||||
test('List embedding backends', () async {
|
||||
final result = await KreuzbergBridge.listEmbeddingBackends();
|
||||
});
|
||||
|
||||
test('List OCR backends', () async {
|
||||
final result = await KreuzbergBridge.listOcrBackends();
|
||||
});
|
||||
|
||||
test('List post-processors', () async {
|
||||
final result = await KreuzbergBridge.listPostProcessors();
|
||||
});
|
||||
|
||||
test('List renderers', () async {
|
||||
final result = await KreuzbergBridge.listRenderers();
|
||||
});
|
||||
|
||||
test('List validators', () async {
|
||||
final result = await KreuzbergBridge.listValidators();
|
||||
});
|
||||
|
||||
}
|
||||
58
e2e/dart/test/renderer_management_test.dart
generated
Normal file
58
e2e/dart/test/renderer_management_test.dart
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: renderer_management
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Clear all renderers and verify list is empty', () async {
|
||||
final result = await KreuzbergBridge.clearRenderers();
|
||||
});
|
||||
|
||||
test('List all registered renderers', () async {
|
||||
final result = await KreuzbergBridge.listRenderers();
|
||||
});
|
||||
|
||||
}
|
||||
117
e2e/dart/test/smoke_test.dart
generated
Normal file
117
e2e/dart/test/smoke_test.dart
generated
Normal file
@@ -0,0 +1,117 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'dart:io';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: smoke
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
final _testDocs = Platform.environment['FIXTURES_DIR'] ?? '../../test_documents';
|
||||
final _dir = Directory(_testDocs);
|
||||
if (_dir.existsSync()) Directory.current = _dir;
|
||||
});
|
||||
|
||||
test('OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('images/test_hello_world.png').readAsBytesSync(), 'image/png');
|
||||
expect(result.mimeType.toString().trim(), equals('image/png'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(1));
|
||||
expect(result.content.contains('Hello') || result.content.contains('World') || result.content.contains('hello') || result.content.contains('world'), isTrue);
|
||||
});
|
||||
|
||||
test('Smoke test: DOCX with formatted text', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('docx/fake.docx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
|
||||
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.wordprocessingml.document'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(20));
|
||||
expect(result.content.contains('Lorem') || result.content.contains('ipsum') || result.content.contains('document') || result.content.contains('text'), isTrue);
|
||||
});
|
||||
|
||||
test('Smoke test: HTML table extraction', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('html/simple_table.html').readAsBytesSync(), 'text/html');
|
||||
expect(result.mimeType.toString().trim(), equals('text/html'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(10));
|
||||
expect(result.content.contains('Sample Data Table') || result.content.contains('Laptop') || result.content.contains('Electronics') || result.content.contains('Product'), isTrue);
|
||||
});
|
||||
|
||||
test('Smoke test: PNG image (without OCR, metadata only)', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('images/sample.png').readAsBytesSync(), 'image/png', ExtractionConfig(useCache: true, enableQualityProcessing: true, forceOcr: false, disableOcr: true, resultFormat: ResultFormat.unified, outputFormat: OutputFormat.plain(), includeDocumentStructure: false, useLayoutForMarkdown: false, maxArchiveDepth: 3));
|
||||
expect(result.mimeType.toString().trim(), equals('image/png'.toString().trim()));
|
||||
});
|
||||
|
||||
test('Smoke test: JSON file extraction', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('json/simple.json').readAsBytesSync(), 'application/json');
|
||||
expect(result.mimeType.toString().trim(), equals('application/json'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(5));
|
||||
});
|
||||
|
||||
test('Smoke test: PDF with simple text extraction', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('pdf/fake_memo.pdf').readAsBytesSync(), 'application/pdf');
|
||||
expect(result.mimeType.toString().trim(), equals('application/pdf'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(50));
|
||||
expect(result.content.contains('May 5, 2023') || result.content.contains('To Whom it May Concern'), isTrue);
|
||||
});
|
||||
|
||||
test('Smoke test: Plain text file', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('text/report.txt').readAsBytesSync(), 'text/plain');
|
||||
expect(result.mimeType.toString().trim(), equals('text/plain'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(5));
|
||||
});
|
||||
|
||||
test('Smoke test: XLSX with basic spreadsheet data including tables', () async {
|
||||
final result = await KreuzbergBridge.extractBytes(File('xlsx/stanley_cups.xlsx').readAsBytesSync(), 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
|
||||
expect(result.mimeType.toString().trim(), equals('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'.toString().trim()));
|
||||
expect(result.content.length, greaterThanOrEqualTo(100));
|
||||
expect(result.content, contains('Team'));
|
||||
expect(result.content, contains('Location'));
|
||||
expect(result.content, contains('Stanley Cups'));
|
||||
expect(result.content, contains('Blues'));
|
||||
expect(result.content, contains('Flyers'));
|
||||
expect(result.content, contains('Maple Leafs'));
|
||||
expect(result.content, contains('STL'));
|
||||
expect(result.content, contains('PHI'));
|
||||
expect(result.content, contains('TOR'));
|
||||
// skipped: field 'tables' not available on dart result type
|
||||
// skipped: field 'metadata.format.excel.sheet_count' not available on dart result type
|
||||
// skipped: field 'metadata.format.excel.sheet_names' not available on dart result type
|
||||
});
|
||||
|
||||
}
|
||||
58
e2e/dart/test/validator_management_test.dart
generated
Normal file
58
e2e/dart/test/validator_management_test.dart
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
// ignore_for_file: unused_local_variable
|
||||
|
||||
import 'package:test/test.dart';
|
||||
import 'package:kreuzberg/kreuzberg.dart';
|
||||
import 'package:kreuzberg/src/kreuzberg_bridge_generated/frb_generated.dart' show RustLib;
|
||||
|
||||
// E2e tests for category: validator_management
|
||||
|
||||
String _alefE2eText(Object? value) {
|
||||
if (value == null) return '';
|
||||
// Check if it's an enum by examining its toString representation.
|
||||
final str = value.toString();
|
||||
if (str.contains('.')) {
|
||||
// Enum.toString() returns 'EnumName.variantName'. Extract the variant name.
|
||||
final parts = str.split('.');
|
||||
if (parts.length == 2) {
|
||||
final variantName = parts[1];
|
||||
// Convert camelCase variant names to snake_case for serde compatibility.
|
||||
// E.g. 'toolCalls' -> 'tool_calls', 'stop' -> 'stop'.
|
||||
return _camelToSnake(variantName);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
String _camelToSnake(String camel) {
|
||||
final buffer = StringBuffer();
|
||||
for (int i = 0; i < camel.length; i++) {
|
||||
final char = camel[i];
|
||||
if (char.contains(RegExp(r'[A-Z]'))) {
|
||||
if (i > 0) buffer.write('_');
|
||||
buffer.write(char.toLowerCase());
|
||||
} else {
|
||||
buffer.write(char);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
void main() {
|
||||
setUpAll(() async {
|
||||
await RustLib.init();
|
||||
});
|
||||
|
||||
test('Clear all validators and verify list is empty', () async {
|
||||
final result = await KreuzbergBridge.clearValidators();
|
||||
});
|
||||
|
||||
test('List all registered validators', () async {
|
||||
final result = await KreuzbergBridge.listValidators();
|
||||
});
|
||||
|
||||
}
|
||||
3
e2e/elixir/lib/e2e_elixir.ex
generated
Normal file
3
e2e/elixir/lib/e2e_elixir.ex
generated
Normal file
@@ -0,0 +1,3 @@
|
||||
defmodule E2eElixir do
|
||||
@moduledoc false
|
||||
end
|
||||
20
e2e/elixir/mix.exs
generated
Normal file
20
e2e/elixir/mix.exs
generated
Normal file
@@ -0,0 +1,20 @@
|
||||
defmodule E2eElixir.MixProject do
|
||||
use Mix.Project
|
||||
|
||||
def project do
|
||||
[
|
||||
app: :e2e_elixir,
|
||||
version: "0.1.0",
|
||||
elixir: "~> 1.14",
|
||||
deps: deps()
|
||||
]
|
||||
end
|
||||
|
||||
defp deps do
|
||||
[
|
||||
{:kreuzberg, path: "../../packages/elixir"},
|
||||
{:rustler_precompiled, "~> 0.9"},
|
||||
{:rustler, "~> 0.37.0", runtime: false}
|
||||
]
|
||||
end
|
||||
end
|
||||
32
e2e/elixir/test/async_test.exs
generated
Normal file
32
e2e/elixir/test/async_test.exs
generated
Normal file
@@ -0,0 +1,32 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: async
|
||||
defmodule E2e.AsyncTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "async_extract_bytes" do
|
||||
test "async_extract_bytes" do
|
||||
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_async(content, "application/pdf")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
|
||||
end
|
||||
end
|
||||
|
||||
describe "async_extract_bytes_empty_mime" do
|
||||
test "async_extract_bytes_empty_mime" do
|
||||
content = File.read!("../../test_documents/text/plain.txt")
|
||||
assert {:error, _} = Kreuzberg.extract_bytes_async(content, "", "{}")
|
||||
end
|
||||
end
|
||||
|
||||
describe "async_extract_bytes_invalid_mime" do
|
||||
test "async_extract_bytes_invalid_mime" do
|
||||
content = File.read!("../../test_documents/text/plain.txt")
|
||||
assert {:error, _} = Kreuzberg.extract_bytes_async(content, "application/x-nonexistent", "{}")
|
||||
end
|
||||
end
|
||||
end
|
||||
89
e2e/elixir/test/batch_test.exs
generated
Normal file
89
e2e/elixir/test/batch_test.exs
generated
Normal file
@@ -0,0 +1,89 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: batch
|
||||
defmodule E2e.BatchTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "batch_bytes_invalid_mime" do
|
||||
@tag :skip
|
||||
test "batch_bytes_invalid_mime" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_extract_bytes_happy" do
|
||||
@tag :skip
|
||||
test "batch_extract_bytes_happy" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_extract_bytes_mixed_format" do
|
||||
@tag :skip
|
||||
test "batch_extract_bytes_mixed_format" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_extract_bytes_sync_empty_list" do
|
||||
@tag :skip
|
||||
test "batch_extract_bytes_sync_empty_list" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_extract_bytes_sync_invalid_mime" do
|
||||
@tag :skip
|
||||
test "batch_extract_bytes_sync_invalid_mime" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_file_async_basic" do
|
||||
@tag :skip
|
||||
test "batch_file_async_basic" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_file_async_not_found" do
|
||||
@tag :skip
|
||||
test "batch_file_async_not_found" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_file_not_found" do
|
||||
@tag :skip
|
||||
test "batch_file_not_found" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_file_partial" do
|
||||
@tag :skip
|
||||
test "batch_file_partial" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "batch_file_sync_basic" do
|
||||
@tag :skip
|
||||
test "batch_file_sync_basic" do
|
||||
# batch functions excluded from Elixir binding: unsafe NIF tuple marshalling
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
19
e2e/elixir/test/code_test.exs
generated
Normal file
19
e2e/elixir/test/code_test.exs
generated
Normal file
@@ -0,0 +1,19 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: code
|
||||
defmodule E2e.CodeTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "code_shebang_detection" do
|
||||
test "code_shebang_detection" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/code/script.sh", mime_type: "text/x-source-code")
|
||||
assert String.trim(result.mime_type) == "text/x-source-code"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert String.contains?(to_string(result.content), "build")
|
||||
assert String.contains?(to_string(result.content), "clean")
|
||||
end
|
||||
end
|
||||
end
|
||||
183
e2e/elixir/test/contract_test.exs
generated
Normal file
183
e2e/elixir/test/contract_test.exs
generated
Normal file
@@ -0,0 +1,183 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: contract
|
||||
defmodule E2e.ContractTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
|
||||
defp alef_e2e_format_to_string(metadata) do
|
||||
case metadata.image do
|
||||
%{format: fmt} when is_binary(fmt) -> fmt
|
||||
_ ->
|
||||
case metadata.pdf do
|
||||
%{} -> "PDF"
|
||||
_ ->
|
||||
case metadata.html do
|
||||
%{} -> "HTML"
|
||||
_ -> inspect(metadata)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "api_batch_bytes_async" do
|
||||
test "api_batch_bytes_async" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "api_batch_bytes_with_configs_async" do
|
||||
test "api_batch_bytes_with_configs_async" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
end
|
||||
end
|
||||
|
||||
describe "api_batch_file_async" do
|
||||
test "api_batch_file_async" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "api_batch_file_with_configs_async" do
|
||||
test "api_batch_file_with_configs_async" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
end
|
||||
end
|
||||
|
||||
describe "api_extract_bytes_async" do
|
||||
test "api_extract_bytes_async" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "api_extract_file_async" do
|
||||
test "api_extract_file_async" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["May 5, 2023", "Mallori"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_chunking_prepend_heading_context" do
|
||||
test "config_chunking_prepend_heading_context" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/markdown/extraction_test.md", config: "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}")
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'chunks' not available on result type
|
||||
assert Enum.all?(result.chunks || [], fn c -> c.content != nil and c.content != "" end)
|
||||
assert Enum.all?(result.chunks || [], fn c -> c.metadata != nil and c.metadata.heading_context != nil end)
|
||||
assert (case List.first(result.chunks || []) do
|
||||
c when is_map(c) -> String.trim_leading(c.content || "") |> String.starts_with?("#")
|
||||
_ -> false
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_document_structure_with_headings" do
|
||||
test "config_document_structure_with_headings" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/docx/fake.docx", config: "{\"include_document_structure\":true}")
|
||||
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
# skipped: field 'document' not available on result type
|
||||
# skipped: field 'document.nodes' not available on result type
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_element_types" do
|
||||
test "config_element_types" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/docx/unit_test_headers.docx", config: "{\"result_format\":\"element_based\"}")
|
||||
assert Enum.any?(["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], fn v -> String.contains?(to_string(result.mime_type), v) end)
|
||||
# skipped: field 'elements' not available on result type
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_extraction_timeout" do
|
||||
test "config_extraction_timeout" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"extraction_timeout_secs\":300}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_keywords" do
|
||||
test "config_keywords" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'keywords' not available on Elixir ExtractionResult
|
||||
# skipped: field 'keywords' not available on Elixir ExtractionResult
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_pages" do
|
||||
test "config_pages" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["PAGE"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_quality_enabled" do
|
||||
test "config_quality_enabled" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"enable_quality_processing\":true}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'quality_score' not available on result type
|
||||
# skipped: field 'quality_score' not available on result type
|
||||
# skipped: field 'quality_score' not available on result type
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_security_limits" do
|
||||
test "config_security_limits" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/archives/documents.zip", config: "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}")
|
||||
assert Enum.any?(["application/zip", "application/x-zip-compressed"], fn v -> String.contains?(to_string(result.mime_type), v) end)
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
end
|
||||
end
|
||||
|
||||
describe "config_tree_sitter" do
|
||||
test "config_tree_sitter" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/code/hello.py", config: "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}")
|
||||
assert String.trim(result.mime_type) == "text/x-source-code"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
|
||||
end
|
||||
end
|
||||
|
||||
describe "output_format_bytes_markdown" do
|
||||
test "output_format_bytes_markdown" do
|
||||
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf", "{\"output_format\":\"markdown\"}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
end
|
||||
end
|
||||
|
||||
describe "output_format_markdown" do
|
||||
test "output_format_markdown" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pdf/fake_memo.pdf", config: "{\"output_format\":\"markdown\"}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
# skipped: field 'metadata.output_format' not available on result type
|
||||
end
|
||||
end
|
||||
end
|
||||
36
e2e/elixir/test/detection_test.exs
generated
Normal file
36
e2e/elixir/test/detection_test.exs
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: detection
|
||||
defmodule E2e.DetectionTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "detect_mime_bytes_html" do
|
||||
test "detect_mime_bytes_html" do
|
||||
content = File.read!("../../test_documents/html/html.html")
|
||||
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
|
||||
end
|
||||
end
|
||||
|
||||
describe "detect_mime_bytes_pdf" do
|
||||
test "detect_mime_bytes_pdf" do
|
||||
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
|
||||
end
|
||||
end
|
||||
|
||||
describe "detect_mime_bytes_png" do
|
||||
test "detect_mime_bytes_png" do
|
||||
content = File.read!("../../test_documents/images/test_hello_world.png")
|
||||
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
|
||||
end
|
||||
end
|
||||
|
||||
describe "get_extensions_unknown_mime" do
|
||||
test "get_extensions_unknown_mime" do
|
||||
assert {:error, _} = Kreuzberg.get_extensions_for_mime("application/x-totally-unknown")
|
||||
end
|
||||
end
|
||||
end
|
||||
21
e2e/elixir/test/document_extractor_management_test.exs
generated
Normal file
21
e2e/elixir/test/document_extractor_management_test.exs
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: document_extractor_management
|
||||
defmodule E2e.DocumentExtractorManagementTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "document_extractors_clear" do
|
||||
test "document_extractors_clear" do
|
||||
result = Kreuzberg.clear_document_extractors()
|
||||
end
|
||||
end
|
||||
|
||||
describe "extractors_list" do
|
||||
test "extractors_list" do
|
||||
result = Kreuzberg.list_document_extractors()
|
||||
end
|
||||
end
|
||||
end
|
||||
29
e2e/elixir/test/embed_async_pending_test.exs
generated
Normal file
29
e2e/elixir/test/embed_async_pending_test.exs
generated
Normal file
@@ -0,0 +1,29 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: embed_async_pending
|
||||
defmodule E2e.EmbedAsyncPendingTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "embed_texts_async_empty_input" do
|
||||
test "embed_texts_async_empty_input" do
|
||||
{:ok, result} = Kreuzberg.embed_texts_async([])
|
||||
assert length(result) == 0
|
||||
end
|
||||
end
|
||||
|
||||
describe "embed_texts_async_happy" do
|
||||
test "embed_texts_async_happy" do
|
||||
{:ok, result} = Kreuzberg.embed_texts_async(["First", "Second"])
|
||||
assert length(result) >= 2
|
||||
end
|
||||
end
|
||||
|
||||
describe "embed_texts_async_preset_switch" do
|
||||
test "embed_texts_async_preset_switch" do
|
||||
{:ok, result} = Kreuzberg.embed_texts_async(["Text"], "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
|
||||
end
|
||||
end
|
||||
end
|
||||
15
e2e/elixir/test/embed_extra_test.exs
generated
Normal file
15
e2e/elixir/test/embed_extra_test.exs
generated
Normal file
@@ -0,0 +1,15 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: embed_extra
|
||||
defmodule E2e.EmbedExtraTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "embed_texts_batch" do
|
||||
test "embed_texts_batch" do
|
||||
{:ok, result} = Kreuzberg.embed_texts(["Hello", "World"], "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}")
|
||||
end
|
||||
end
|
||||
end
|
||||
21
e2e/elixir/test/embedding_backend_management_test.exs
generated
Normal file
21
e2e/elixir/test/embedding_backend_management_test.exs
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: embedding_backend_management
|
||||
defmodule E2e.EmbeddingBackendManagementTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "embedding_backends_clear" do
|
||||
test "embedding_backends_clear" do
|
||||
result = Kreuzberg.clear_embedding_backends()
|
||||
end
|
||||
end
|
||||
|
||||
describe "embedding_backends_list" do
|
||||
test "embedding_backends_list" do
|
||||
result = Kreuzberg.list_embedding_backends()
|
||||
end
|
||||
end
|
||||
end
|
||||
42
e2e/elixir/test/embeddings_test.exs
generated
Normal file
42
e2e/elixir/test/embeddings_test.exs
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: embeddings
|
||||
defmodule E2e.EmbeddingsTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "embed_texts_different_preset" do
|
||||
test "embed_texts_different_preset" do
|
||||
{:ok, result} = Kreuzberg.embed_texts(["Hello world", "Test"], "{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}")
|
||||
assert length(result) >= 2
|
||||
end
|
||||
end
|
||||
|
||||
describe "get_embedding_preset_known" do
|
||||
test "get_embedding_preset_known" do
|
||||
result = Kreuzberg.get_embedding_preset("balanced")
|
||||
end
|
||||
end
|
||||
|
||||
describe "get_embedding_preset_nominal" do
|
||||
test "get_embedding_preset_nominal" do
|
||||
result = Kreuzberg.get_embedding_preset("balanced")
|
||||
end
|
||||
end
|
||||
|
||||
describe "get_embedding_preset_unknown" do
|
||||
test "get_embedding_preset_unknown" do
|
||||
result = Kreuzberg.get_embedding_preset("nonexistent-xyz")
|
||||
assert is_nil(result) or String.trim(result) == ""
|
||||
end
|
||||
end
|
||||
|
||||
describe "list_embedding_presets_sanity" do
|
||||
test "list_embedding_presets_sanity" do
|
||||
result = Kreuzberg.list_embedding_presets()
|
||||
assert result != ""
|
||||
end
|
||||
end
|
||||
end
|
||||
44
e2e/elixir/test/error_test.exs
generated
Normal file
44
e2e/elixir/test/error_test.exs
generated
Normal file
@@ -0,0 +1,44 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: error
|
||||
defmodule E2e.ErrorTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "error_empty_bytes" do
|
||||
test "error_empty_bytes" do
|
||||
content = File.read!("../../test_documents/text/empty.txt")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "text/plain", "{}")
|
||||
end
|
||||
end
|
||||
|
||||
describe "error_empty_mime" do
|
||||
test "error_empty_mime" do
|
||||
content = File.read!("../../test_documents/text/plain.txt")
|
||||
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "", "{}")
|
||||
end
|
||||
end
|
||||
|
||||
describe "error_extract_bytes_conflicting_ocr" do
|
||||
test "error_extract_bytes_conflicting_ocr" do
|
||||
content = File.read!("../../test_documents/text/fake_text.txt")
|
||||
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "text/plain", "{\"disable_ocr\":true,\"force_ocr\":true}")
|
||||
end
|
||||
end
|
||||
|
||||
describe "error_invalid_mime_format" do
|
||||
test "error_invalid_mime_format" do
|
||||
content = File.read!("../../test_documents/text/plain.txt")
|
||||
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "not-a-mime", "{}")
|
||||
end
|
||||
end
|
||||
|
||||
describe "error_unsupported_mime" do
|
||||
test "error_unsupported_mime" do
|
||||
content = File.read!("../../test_documents/text/plain.txt")
|
||||
assert {:error, _} = Kreuzberg.extract_bytes_sync(content, "application/x-nonexistent", "{}")
|
||||
end
|
||||
end
|
||||
end
|
||||
47
e2e/elixir/test/format_specific_test.exs
generated
Normal file
47
e2e/elixir/test/format_specific_test.exs
generated
Normal file
@@ -0,0 +1,47 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: format_specific
|
||||
defmodule E2e.FormatSpecificTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "format_docx_standalone" do
|
||||
test "format_docx_standalone" do
|
||||
content = File.read!("../../test_documents/docx/fake.docx")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
|
||||
end
|
||||
end
|
||||
|
||||
describe "format_hwpx_standalone" do
|
||||
test "format_hwpx_standalone" do
|
||||
content = File.read!("../../test_documents/hwpx/simple.hwpx")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/haansofthwpx")
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
|
||||
assert String.contains?(to_string(result.content), "Hello from HWPX")
|
||||
end
|
||||
end
|
||||
|
||||
describe "format_pdf_text" do
|
||||
test "format_pdf_text" do
|
||||
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_sync(content, "application/pdf")
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
|
||||
assert Enum.any?(["Mallori", "May"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "format_pptx" do
|
||||
test "format_pptx" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/pptx/simple.pptx", mime_type: "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
||||
end
|
||||
end
|
||||
|
||||
describe "format_xlsx" do
|
||||
test "format_xlsx" do
|
||||
{:ok, result} = Kreuzberg.extract_file_sync("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
end
|
||||
end
|
||||
end
|
||||
32
e2e/elixir/test/mime_utilities_test.exs
generated
Normal file
32
e2e/elixir/test/mime_utilities_test.exs
generated
Normal file
@@ -0,0 +1,32 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: mime_utilities
|
||||
defmodule E2e.MimeUtilitiesTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "mime_detect_bytes" do
|
||||
test "mime_detect_bytes" do
|
||||
content = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
|
||||
assert String.contains?(to_string(result), "pdf")
|
||||
end
|
||||
end
|
||||
|
||||
describe "mime_detect_image" do
|
||||
test "mime_detect_image" do
|
||||
content = File.read!("../../test_documents/images/test_hello_world.png")
|
||||
{:ok, result} = Kreuzberg.detect_mime_type_from_bytes(content)
|
||||
assert String.contains?(to_string(result), "png")
|
||||
end
|
||||
end
|
||||
|
||||
describe "mime_get_extensions" do
|
||||
test "mime_get_extensions" do
|
||||
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/pdf")
|
||||
assert String.contains?(to_string(result), "pdf")
|
||||
end
|
||||
end
|
||||
end
|
||||
27
e2e/elixir/test/ocr_backend_management_test.exs
generated
Normal file
27
e2e/elixir/test/ocr_backend_management_test.exs
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: ocr_backend_management
|
||||
defmodule E2e.OcrBackendManagementTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "ocr_backends_clear" do
|
||||
test "ocr_backends_clear" do
|
||||
result = Kreuzberg.clear_ocr_backends()
|
||||
end
|
||||
end
|
||||
|
||||
describe "ocr_backends_list" do
|
||||
test "ocr_backends_list" do
|
||||
result = Kreuzberg.list_ocr_backends()
|
||||
end
|
||||
end
|
||||
|
||||
describe "ocr_backends_unregister" do
|
||||
test "ocr_backends_unregister" do
|
||||
result = Kreuzberg.unregister_ocr_backend("nonexistent-backend-xyz")
|
||||
end
|
||||
end
|
||||
end
|
||||
24
e2e/elixir/test/pdf_test.exs
generated
Normal file
24
e2e/elixir/test/pdf_test.exs
generated
Normal file
@@ -0,0 +1,24 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: pdf
|
||||
defmodule E2e.PdfTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "render_pdf_page_first" do
|
||||
test "render_pdf_page_first" do
|
||||
pdf_bytes = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
{:ok, result} = Kreuzberg.render_pdf_page_to_png(pdf_bytes, 0)
|
||||
assert (is_binary(result) && byte_size(result) >= 100) || (is_list(result) && length(result) >= 100) || (is_binary(result) == false && is_list(result) == false && String.length(result) >= 100)
|
||||
end
|
||||
end
|
||||
|
||||
describe "render_pdf_page_out_of_range" do
|
||||
test "render_pdf_page_out_of_range" do
|
||||
pdf_bytes = File.read!("../../test_documents/pdf/fake_memo.pdf")
|
||||
assert {:error, _} = Kreuzberg.render_pdf_page_to_png(pdf_bytes, 999)
|
||||
end
|
||||
end
|
||||
end
|
||||
327
e2e/elixir/test/plugin_api_test.exs
generated
Normal file
327
e2e/elixir/test/plugin_api_test.exs
generated
Normal file
@@ -0,0 +1,327 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: plugin_api
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge do
|
||||
def name, do: "test-extractor"
|
||||
def version, do: "test"
|
||||
def initialize, do: :ok
|
||||
def shutdown, do: :ok
|
||||
def extract_bytes(content, mime_type, config), do: {:ok, %{}}
|
||||
def extract_file(path, mime_type, config), do: {:ok, %{}}
|
||||
def supported_mime_types, do: []
|
||||
def priority, do: 0
|
||||
def can_handle(_path, _mime_type), do: false
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer do
|
||||
use GenServer
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
method_name = to_string(method_atom)
|
||||
ordered_args = __alef_ordered_args__(method_name, args)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge, String.to_existing_atom(method_name), ordered_args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
defp __alef_ordered_args__("extract_bytes", args), do: [args["content"], args["mime_type"], args["config"]]
|
||||
defp __alef_ordered_args__("extract_file", args), do: [args["path"], args["mime_type"], args["config"]]
|
||||
defp __alef_ordered_args__("supported_mime_types", args), do: []
|
||||
defp __alef_ordered_args__("priority", args), do: []
|
||||
defp __alef_ordered_args__("can_handle", args), do: [args["_path"], args["_mime_type"]]
|
||||
defp __alef_ordered_args__("version", _args), do: []
|
||||
defp __alef_ordered_args__("initialize", _args), do: []
|
||||
defp __alef_ordered_args__("shutdown", _args), do: []
|
||||
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge do
|
||||
def name, do: "test-embedding-backend"
|
||||
def version, do: "test"
|
||||
def initialize, do: :ok
|
||||
def shutdown, do: :ok
|
||||
def dimensions, do: 1
|
||||
def embed(texts), do: {:ok, []}
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer do
|
||||
use GenServer
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
method_name = to_string(method_atom)
|
||||
ordered_args = __alef_ordered_args__(method_name, args)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridge, String.to_existing_atom(method_name), ordered_args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
defp __alef_ordered_args__("dimensions", args), do: []
|
||||
defp __alef_ordered_args__("embed", args), do: [args["texts"]]
|
||||
defp __alef_ordered_args__("version", _args), do: []
|
||||
defp __alef_ordered_args__("initialize", _args), do: []
|
||||
defp __alef_ordered_args__("shutdown", _args), do: []
|
||||
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge do
|
||||
def name, do: "test-backend"
|
||||
def version, do: "test"
|
||||
def initialize, do: :ok
|
||||
def shutdown, do: :ok
|
||||
def process_image(image_bytes, config), do: {:ok, %{}}
|
||||
def process_image_file(path, config), do: {:ok, %{}}
|
||||
def supports_language(lang), do: false
|
||||
def backend_type, do: %{}
|
||||
def supported_languages, do: []
|
||||
def supports_table_detection, do: false
|
||||
def supports_document_processing, do: false
|
||||
def process_document(_path, _config), do: {:ok, %{}}
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer do
|
||||
use GenServer
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
method_name = to_string(method_atom)
|
||||
ordered_args = __alef_ordered_args__(method_name, args)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterOcrBackendTraitBridge, String.to_existing_atom(method_name), ordered_args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
defp __alef_ordered_args__("process_image", args), do: [args["image_bytes"], args["config"]]
|
||||
defp __alef_ordered_args__("process_image_file", args), do: [args["path"], args["config"]]
|
||||
defp __alef_ordered_args__("supports_language", args), do: [args["lang"]]
|
||||
defp __alef_ordered_args__("backend_type", args), do: []
|
||||
defp __alef_ordered_args__("supported_languages", args), do: []
|
||||
defp __alef_ordered_args__("supports_table_detection", args), do: []
|
||||
defp __alef_ordered_args__("supports_document_processing", args), do: []
|
||||
defp __alef_ordered_args__("process_document", args), do: [args["_path"], args["_config"]]
|
||||
defp __alef_ordered_args__("version", _args), do: []
|
||||
defp __alef_ordered_args__("initialize", _args), do: []
|
||||
defp __alef_ordered_args__("shutdown", _args), do: []
|
||||
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge do
|
||||
def name, do: "test-processor"
|
||||
def version, do: "test"
|
||||
def initialize, do: :ok
|
||||
def shutdown, do: :ok
|
||||
def process(result, config), do: {:ok, nil}
|
||||
def processing_stage, do: %{}
|
||||
def should_process(_result, _config), do: false
|
||||
def estimated_duration_ms(_result), do: 0
|
||||
def priority, do: 0
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer do
|
||||
use GenServer
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
method_name = to_string(method_atom)
|
||||
ordered_args = __alef_ordered_args__(method_name, args)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterPostProcessorTraitBridge, String.to_existing_atom(method_name), ordered_args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
defp __alef_ordered_args__("process", args), do: [args["result"], args["config"]]
|
||||
defp __alef_ordered_args__("processing_stage", args), do: []
|
||||
defp __alef_ordered_args__("should_process", args), do: [args["_result"], args["_config"]]
|
||||
defp __alef_ordered_args__("estimated_duration_ms", args), do: [args["_result"]]
|
||||
defp __alef_ordered_args__("priority", args), do: []
|
||||
defp __alef_ordered_args__("version", _args), do: []
|
||||
defp __alef_ordered_args__("initialize", _args), do: []
|
||||
defp __alef_ordered_args__("shutdown", _args), do: []
|
||||
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterRendererTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterRendererTraitBridge do
|
||||
def name, do: "test-renderer"
|
||||
def version, do: "test"
|
||||
def initialize, do: :ok
|
||||
def shutdown, do: :ok
|
||||
def render(doc), do: {:ok, ""}
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer do
|
||||
use GenServer
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
method_name = to_string(method_atom)
|
||||
ordered_args = __alef_ordered_args__(method_name, args)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterRendererTraitBridge, String.to_existing_atom(method_name), ordered_args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
defp __alef_ordered_args__("render", args), do: [args["doc"]]
|
||||
defp __alef_ordered_args__("version", _args), do: []
|
||||
defp __alef_ordered_args__("initialize", _args), do: []
|
||||
defp __alef_ordered_args__("shutdown", _args), do: []
|
||||
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterValidatorTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterValidatorTraitBridge do
|
||||
def name, do: "test-validator"
|
||||
def version, do: "test"
|
||||
def initialize, do: :ok
|
||||
def shutdown, do: :ok
|
||||
def validate(result, config), do: {:ok, nil}
|
||||
def should_validate(_result, _config), do: false
|
||||
def priority, do: 0
|
||||
end
|
||||
end
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer do
|
||||
use GenServer
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
method_name = to_string(method_atom)
|
||||
ordered_args = __alef_ordered_args__(method_name, args)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterValidatorTraitBridge, String.to_existing_atom(method_name), ordered_args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
defp __alef_ordered_args__("validate", args), do: [args["result"], args["config"]]
|
||||
defp __alef_ordered_args__("should_validate", args), do: [args["_result"], args["_config"]]
|
||||
defp __alef_ordered_args__("priority", args), do: []
|
||||
defp __alef_ordered_args__("version", _args), do: []
|
||||
defp __alef_ordered_args__("initialize", _args), do: []
|
||||
defp __alef_ordered_args__("shutdown", _args), do: []
|
||||
defp __alef_ordered_args__(_method, args) when map_size(args) == 0, do: []
|
||||
end
|
||||
end
|
||||
defmodule E2e.PluginApiTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "register_document_extractor_trait_bridge" do
|
||||
test "register_document_extractor_trait_bridge" do
|
||||
{:ok, registerdocumentextractortraitbridge_pid} = E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer.start_link(nil)
|
||||
|
||||
result = Kreuzberg.register_document_extractor(registerdocumentextractortraitbridge_pid, "test-extractor")
|
||||
end
|
||||
end
|
||||
|
||||
describe "register_embedding_backend_trait_bridge" do
|
||||
test "register_embedding_backend_trait_bridge" do
|
||||
{:ok, registerembeddingbackendtraitbridge_pid} = E2e.TestStubs.TestStubRegisterEmbeddingBackendTraitBridgeGenServer.start_link(nil)
|
||||
|
||||
result = Kreuzberg.register_embedding_backend(registerembeddingbackendtraitbridge_pid, "test-embedding-backend")
|
||||
end
|
||||
end
|
||||
|
||||
describe "register_ocr_backend_trait_bridge" do
|
||||
test "register_ocr_backend_trait_bridge" do
|
||||
{:ok, registerocrbackendtraitbridge_pid} = E2e.TestStubs.TestStubRegisterOcrBackendTraitBridgeGenServer.start_link(nil)
|
||||
|
||||
result = Kreuzberg.register_ocr_backend(registerocrbackendtraitbridge_pid, "test-backend")
|
||||
end
|
||||
end
|
||||
|
||||
describe "register_post_processor_trait_bridge" do
|
||||
test "register_post_processor_trait_bridge" do
|
||||
{:ok, registerpostprocessortraitbridge_pid} = E2e.TestStubs.TestStubRegisterPostProcessorTraitBridgeGenServer.start_link(nil)
|
||||
|
||||
result = Kreuzberg.register_post_processor(registerpostprocessortraitbridge_pid, "test-processor")
|
||||
end
|
||||
end
|
||||
|
||||
describe "register_renderer_trait_bridge" do
|
||||
test "register_renderer_trait_bridge" do
|
||||
{:ok, registerrenderertraitbridge_pid} = E2e.TestStubs.TestStubRegisterRendererTraitBridgeGenServer.start_link(nil)
|
||||
|
||||
result = Kreuzberg.register_renderer(registerrenderertraitbridge_pid, "test-renderer")
|
||||
end
|
||||
end
|
||||
|
||||
describe "register_validator_trait_bridge" do
|
||||
test "register_validator_trait_bridge" do
|
||||
{:ok, registervalidatortraitbridge_pid} = E2e.TestStubs.TestStubRegisterValidatorTraitBridgeGenServer.start_link(nil)
|
||||
|
||||
result = Kreuzberg.register_validator(registervalidatortraitbridge_pid, "test-validator")
|
||||
end
|
||||
end
|
||||
|
||||
describe "unregister_document_extractor_after_register" do
|
||||
test "unregister_document_extractor_after_register" do
|
||||
result = Kreuzberg.unregister_document_extractor("test-extractor")
|
||||
end
|
||||
end
|
||||
|
||||
describe "unregister_embedding_backend_after_register" do
|
||||
test "unregister_embedding_backend_after_register" do
|
||||
result = Kreuzberg.unregister_embedding_backend("test-embedding-backend")
|
||||
end
|
||||
end
|
||||
|
||||
describe "unregister_post_processor_after_register" do
|
||||
test "unregister_post_processor_after_register" do
|
||||
result = Kreuzberg.unregister_post_processor("test-processor")
|
||||
end
|
||||
end
|
||||
|
||||
describe "unregister_renderer_after_register" do
|
||||
test "unregister_renderer_after_register" do
|
||||
result = Kreuzberg.unregister_renderer("test-renderer")
|
||||
end
|
||||
end
|
||||
|
||||
describe "unregister_validator_after_register" do
|
||||
test "unregister_validator_after_register" do
|
||||
result = Kreuzberg.unregister_validator("test-validator")
|
||||
end
|
||||
end
|
||||
end
|
||||
21
e2e/elixir/test/post_processor_management_test.exs
generated
Normal file
21
e2e/elixir/test/post_processor_management_test.exs
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: post_processor_management
|
||||
defmodule E2e.PostProcessorManagementTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "post_processors_clear" do
|
||||
test "post_processors_clear" do
|
||||
result = Kreuzberg.clear_post_processors()
|
||||
end
|
||||
end
|
||||
|
||||
describe "post_processors_list" do
|
||||
test "post_processors_list" do
|
||||
result = Kreuzberg.list_post_processors()
|
||||
end
|
||||
end
|
||||
end
|
||||
27
e2e/elixir/test/registry_operations_test.exs
generated
Normal file
27
e2e/elixir/test/registry_operations_test.exs
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: registry_operations
|
||||
defmodule E2e.RegistryOperationsTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "extensions_docx" do
|
||||
test "extensions_docx" do
|
||||
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
end
|
||||
end
|
||||
|
||||
describe "extensions_html" do
|
||||
test "extensions_html" do
|
||||
{:ok, result} = Kreuzberg.get_extensions_for_mime("text/html")
|
||||
end
|
||||
end
|
||||
|
||||
describe "extensions_pdf" do
|
||||
test "extensions_pdf" do
|
||||
{:ok, result} = Kreuzberg.get_extensions_for_mime("application/pdf")
|
||||
end
|
||||
end
|
||||
end
|
||||
45
e2e/elixir/test/registry_test.exs
generated
Normal file
45
e2e/elixir/test/registry_test.exs
generated
Normal file
@@ -0,0 +1,45 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: registry
|
||||
defmodule E2e.RegistryTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "list_document_extractors" do
|
||||
test "list_document_extractors" do
|
||||
result = Kreuzberg.list_document_extractors()
|
||||
end
|
||||
end
|
||||
|
||||
describe "list_embedding_backends" do
|
||||
test "list_embedding_backends" do
|
||||
result = Kreuzberg.list_embedding_backends()
|
||||
end
|
||||
end
|
||||
|
||||
describe "list_ocr_backends" do
|
||||
test "list_ocr_backends" do
|
||||
result = Kreuzberg.list_ocr_backends()
|
||||
end
|
||||
end
|
||||
|
||||
describe "list_post_processors" do
|
||||
test "list_post_processors" do
|
||||
result = Kreuzberg.list_post_processors()
|
||||
end
|
||||
end
|
||||
|
||||
describe "list_renderers" do
|
||||
test "list_renderers" do
|
||||
result = Kreuzberg.list_renderers()
|
||||
end
|
||||
end
|
||||
|
||||
describe "list_validators" do
|
||||
test "list_validators" do
|
||||
result = Kreuzberg.list_validators()
|
||||
end
|
||||
end
|
||||
end
|
||||
21
e2e/elixir/test/renderer_management_test.exs
generated
Normal file
21
e2e/elixir/test/renderer_management_test.exs
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: renderer_management
|
||||
defmodule E2e.RendererManagementTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "renderers_clear" do
|
||||
test "renderers_clear" do
|
||||
result = Kreuzberg.clear_renderers()
|
||||
end
|
||||
end
|
||||
|
||||
describe "renderers_list" do
|
||||
test "renderers_list" do
|
||||
result = Kreuzberg.list_renderers()
|
||||
end
|
||||
end
|
||||
end
|
||||
118
e2e/elixir/test/smoke_test.exs
generated
Normal file
118
e2e/elixir/test/smoke_test.exs
generated
Normal file
@@ -0,0 +1,118 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: smoke
|
||||
defmodule E2e.SmokeTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
defp alef_e2e_item_texts(item) when is_binary(item), do: [item]
|
||||
defp alef_e2e_item_texts(item) do
|
||||
[:kind, :name, :signature, :path, :alias, :text, :source]
|
||||
|> Enum.filter(&Map.has_key?(item, &1))
|
||||
|> Enum.flat_map(fn attr ->
|
||||
case Map.get(item, attr) do
|
||||
nil -> []
|
||||
atom when is_atom(atom) -> [atom |> to_string() |> String.capitalize()]
|
||||
str -> [inspect(str)]
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
defp alef_e2e_format_to_string(value) when is_binary(value), do: value
|
||||
defp alef_e2e_format_to_string(metadata) do
|
||||
case metadata.image do
|
||||
%{format: fmt} when is_binary(fmt) -> fmt
|
||||
_ ->
|
||||
case metadata.pdf do
|
||||
%{} -> "PDF"
|
||||
_ ->
|
||||
case metadata.html do
|
||||
%{} -> "HTML"
|
||||
_ -> inspect(metadata)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "ocr_image_png" do
|
||||
test "ocr_image_png" do
|
||||
content = File.read!("../../test_documents/images/test_hello_world.png")
|
||||
{:ok, result} = Kreuzberg.extract_bytes_async(content, "image/png", "{}")
|
||||
assert String.trim(result.mime_type) == "image/png"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 1) || (is_list(result.content) && length(result.content) >= 1) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 1)
|
||||
assert Enum.any?(["Hello", "World", "hello", "world"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_docx_basic" do
|
||||
test "smoke_docx_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/docx/fake.docx", mime_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 20) || (is_list(result.content) && length(result.content) >= 20) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 20)
|
||||
assert Enum.any?(["Lorem", "ipsum", "document", "text"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_html_basic" do
|
||||
test "smoke_html_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/html/simple_table.html", mime_type: "text/html", config: "{}")
|
||||
assert String.trim(result.mime_type) == "text/html"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 10) || (is_list(result.content) && length(result.content) >= 10) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 10)
|
||||
assert Enum.any?(["Sample Data Table", "Laptop", "Electronics", "Product"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_image_png" do
|
||||
test "smoke_image_png" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/images/sample.png", config: "{\"disable_ocr\":true}")
|
||||
assert String.trim(result.mime_type) == "image/png"
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_json_basic" do
|
||||
test "smoke_json_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/json/simple.json", mime_type: "application/json", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/json"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_pdf_basic" do
|
||||
test "smoke_pdf_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/pdf/fake_memo.pdf", mime_type: "application/pdf", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/pdf"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 50) || (is_list(result.content) && length(result.content) >= 50) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 50)
|
||||
assert Enum.any?(["May 5, 2023", "To Whom it May Concern"], fn v -> String.contains?(to_string(result.content), v) end)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_txt_basic" do
|
||||
test "smoke_txt_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/text/report.txt", mime_type: "text/plain", config: "{}")
|
||||
assert String.trim(result.mime_type) == "text/plain"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 5) || (is_list(result.content) && length(result.content) >= 5) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 5)
|
||||
end
|
||||
end
|
||||
|
||||
describe "smoke_xlsx_basic" do
|
||||
test "smoke_xlsx_basic" do
|
||||
{:ok, result} = Kreuzberg.extract_file_async("../../test_documents/xlsx/stanley_cups.xlsx", mime_type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", config: "{}")
|
||||
assert String.trim(result.mime_type) == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
assert (is_binary(result.content) && byte_size(result.content) >= 100) || (is_list(result.content) && length(result.content) >= 100) || (is_binary(result.content) == false && is_list(result.content) == false && String.length(result.content) >= 100)
|
||||
assert String.contains?(to_string(result.content), "Team")
|
||||
assert String.contains?(to_string(result.content), "Location")
|
||||
assert String.contains?(to_string(result.content), "Stanley Cups")
|
||||
assert String.contains?(to_string(result.content), "Blues")
|
||||
assert String.contains?(to_string(result.content), "Flyers")
|
||||
assert String.contains?(to_string(result.content), "Maple Leafs")
|
||||
assert String.contains?(to_string(result.content), "STL")
|
||||
assert String.contains?(to_string(result.content), "PHI")
|
||||
assert String.contains?(to_string(result.content), "TOR")
|
||||
# skipped: field 'tables' not available on result type
|
||||
# skipped: field 'metadata.format.excel.sheet_count' not available on result type
|
||||
# skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
end
|
||||
end
|
||||
end
|
||||
1
e2e/elixir/test/test_helper.exs
generated
Normal file
1
e2e/elixir/test/test_helper.exs
generated
Normal file
@@ -0,0 +1 @@
|
||||
ExUnit.start()
|
||||
21
e2e/elixir/test/validator_management_test.exs
generated
Normal file
21
e2e/elixir/test/validator_management_test.exs
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
# This file is auto-generated by alef — DO NOT EDIT.
|
||||
# alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
# To regenerate: alef generate
|
||||
# To verify freshness: alef verify --exit-code
|
||||
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
# E2e tests for category: validator_management
|
||||
defmodule E2e.ValidatorManagementTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
describe "validators_clear" do
|
||||
test "validators_clear" do
|
||||
result = Kreuzberg.clear_validators()
|
||||
end
|
||||
end
|
||||
|
||||
describe "validators_list" do
|
||||
test "validators_list" do
|
||||
result = Kreuzberg.list_validators()
|
||||
end
|
||||
end
|
||||
end
|
||||
41
e2e/elixir/test_syntax_check.exs
generated
Normal file
41
e2e/elixir/test_syntax_check.exs
generated
Normal file
@@ -0,0 +1,41 @@
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge do
|
||||
def name, do: "test-extractor"
|
||||
def initialize, do: :ok
|
||||
def extract_bytes(content, mime_type, config), do: {:ok, %{}}
|
||||
def supported_mime_types, do: []
|
||||
end
|
||||
end
|
||||
|
||||
unless Code.ensure_loaded?(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer) do
|
||||
defmodule E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer do
|
||||
use GenServer
|
||||
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, nil)
|
||||
end
|
||||
|
||||
@impl true
|
||||
def init(_), do: {:ok, nil}
|
||||
|
||||
@impl true
|
||||
def handle_info({:trait_call, method_atom, args_json, reply_id}, state) do
|
||||
args = Jason.decode!(args_json)
|
||||
result = apply(E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridge, method_atom, args)
|
||||
result_json = Jason.encode!(result)
|
||||
Kreuzberg.Native.complete_trait_call(reply_id, result_json)
|
||||
{:noreply, state}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defmodule E2e.PluginApiTest do
|
||||
use ExUnit.Case
|
||||
|
||||
describe "register_document_extractor_trait_bridge" do
|
||||
test "register_document_extractor_trait_bridge" do
|
||||
{:ok, registerdocumentextractortraitbridge_pid} = E2e.TestStubs.TestStubRegisterDocumentExtractorTraitBridgeGenServer.start_link(nil)
|
||||
result = Kreuzberg.register_document_extractor(registerdocumentextractortraitbridge_pid, "test-extractor")
|
||||
end
|
||||
end
|
||||
end
|
||||
58
e2e/go/async_test.go
generated
Normal file
58
e2e/go/async_test.go
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: async
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_AsyncExtractBytes(t *testing.T) {
|
||||
// Async extract_bytes call on PDF document
|
||||
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytes(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
|
||||
}
|
||||
|
||||
func Test_AsyncExtractBytesEmptyMime(t *testing.T) {
|
||||
// extract_bytes empty MIME async
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytes(contentBytes, ``, kreuzberg.ExtractionConfig{})
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
|
||||
func Test_AsyncExtractBytesInvalidMime(t *testing.T) {
|
||||
// extract_bytes unsupported MIME async
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytes(contentBytes, `application/x-nonexistent`, kreuzberg.ExtractionConfig{})
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
139
e2e/go/batch_test.go
generated
Normal file
139
e2e/go/batch_test.go
generated
Normal file
@@ -0,0 +1,139 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: batch
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_BatchBytesInvalidMime(t *testing.T) {
|
||||
// batch_extract_bytes_sync invalid MIME
|
||||
var items []kreuzberg.BatchBytesItem
|
||||
if err := json.Unmarshal([]byte(`[{"content":"SGVsbG8=","mime_type":"application/x-nonexistent"}]`), &items); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchExtractBytesHappy(t *testing.T) {
|
||||
// batch_extract_bytes: happy path with mixed inputs
|
||||
var items []kreuzberg.BatchBytesItem
|
||||
if err := json.Unmarshal([]byte(`[{"content":"SGVsbG8sIHdvcmxkIQ==","mime_type":"text/plain"},{"content":"PGh0bWw+PGJvZHk+VGVzdDwvYm9keT48L2h0bWw+","mime_type":"text/html"}]`), &items); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.BatchExtractBytes(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result), 1, "expected at least 1 elements")
|
||||
}
|
||||
|
||||
func Test_BatchExtractBytesMixedFormat(t *testing.T) {
|
||||
// batch_extract_bytes: handles unsupported MIME gracefully
|
||||
var items []kreuzberg.BatchBytesItem
|
||||
if err := json.Unmarshal([]byte(`[{"content":"UERGIHBsYWNlaG9sZGVy","mime_type":"application/x-unknown"}]`), &items); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractBytes(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchExtractBytesSyncEmptyList(t *testing.T) {
|
||||
// batch_extract_bytes_sync: empty batch
|
||||
var items []kreuzberg.BatchBytesItem
|
||||
if err := json.Unmarshal([]byte(`[]`), &items); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.Equal(t, len(result), 0, "expected exactly 0 elements")
|
||||
}
|
||||
|
||||
func Test_BatchExtractBytesSyncInvalidMime(t *testing.T) {
|
||||
// batch_extract_bytes_sync: unsupported MIME
|
||||
var items []kreuzberg.BatchBytesItem
|
||||
if err := json.Unmarshal([]byte(`[{"content":"ZGF0YQ==","mime_type":"application/x-unknown"}]`), &items); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchFileAsyncBasic(t *testing.T) {
|
||||
// Extract text from multiple files asynchronously
|
||||
var paths []kreuzberg.BatchFileItem
|
||||
if err := json.Unmarshal([]byte(`[{"path":"pdf/fake_memo.pdf"},{"path":"text/fake_text.txt"}]`), &paths); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractFiles(paths, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchFileAsyncNotFound(t *testing.T) {
|
||||
// batch_extract_file async nonexistent
|
||||
var paths []kreuzberg.BatchFileItem
|
||||
if err := json.Unmarshal([]byte(`[{"path":"/nonexistent/a.pdf"}]`), &paths); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractFiles(paths, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchFileNotFound(t *testing.T) {
|
||||
// batch_extract_file_sync nonexistent
|
||||
var paths []kreuzberg.BatchFileItem
|
||||
if err := json.Unmarshal([]byte(`[{"path":"/nonexistent/a.pdf"},{"path":"/nonexistent/b.txt"}]`), &paths); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchFilePartial(t *testing.T) {
|
||||
// batch_extract_file_sync mixed
|
||||
var paths []kreuzberg.BatchFileItem
|
||||
if err := json.Unmarshal([]byte(`[{"path":"text/plain.txt"},{"path":"/nonexistent/missing.pdf"}]`), &paths); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_BatchFileSyncBasic(t *testing.T) {
|
||||
// Extract text from multiple files synchronously
|
||||
var paths []kreuzberg.BatchFileItem
|
||||
if err := json.Unmarshal([]byte(`[{"path":"pdf/fake_memo.pdf"},{"path":"text/fake_text.txt"}]`), &paths); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
36
e2e/go/code_test.go
generated
Normal file
36
e2e/go/code_test.go
generated
Normal file
@@ -0,0 +1,36 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: code
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_CodeShebangDetection(t *testing.T) {
|
||||
// Test language detection from shebang line via bytes input
|
||||
mime_typeVal := `text/x-source-code`
|
||||
result, err := kreuzberg.ExtractFileSync(`code/script.sh`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `text/x-source-code` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
if !strings.Contains(string(result.Content), `build`) {
|
||||
t.Errorf("expected to contain %s", `build`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `clean`) {
|
||||
t.Errorf("expected to contain %s", `clean`)
|
||||
}
|
||||
}
|
||||
338
e2e/go/contract_test.go
generated
Normal file
338
e2e/go/contract_test.go
generated
Normal file
@@ -0,0 +1,338 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: contract
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_ApiBatchBytesAsync(t *testing.T) {
|
||||
// Tests async batch bytes extraction API (batch_extract_bytes)
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ApiBatchBytesWithConfigsAsync(t *testing.T) {
|
||||
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
func Test_ApiBatchFileAsync(t *testing.T) {
|
||||
// Tests async batch file extraction API (batch_extract_file)
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ApiBatchFileWithConfigsAsync(t *testing.T) {
|
||||
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
func Test_ApiExtractBytesAsync(t *testing.T) {
|
||||
// Tests async bytes extraction API (extract_bytes)
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ApiExtractFileAsync(t *testing.T) {
|
||||
// Tests async file extraction API (extract_file)
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ConfigChunkingPrependHeadingContext(t *testing.T) {
|
||||
// Tests markdown chunker prepends heading hierarchy to chunk content
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`markdown/extraction_test.md`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'chunks' not available on result type
|
||||
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil { return false }; for _, c := range chunks { if c.Content == "" { return false } }; return true }(), "expected true")
|
||||
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil { return false }; for _, c := range chunks { if c.Metadata.HeadingContext == nil { return false } }; return true }(), "expected true")
|
||||
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil || len(chunks) == 0 { return false }; return chunks[0].Metadata.HeadingContext != nil }(), "expected true")
|
||||
}
|
||||
|
||||
func Test_ConfigDocumentStructureWithHeadings(t *testing.T) {
|
||||
// Tests document structure with DOCX heading-driven nesting
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"include_document_structure":true}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`docx/fake.docx`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
// skipped: field 'document' not available on result type
|
||||
// skipped: field 'document.nodes' not available on result type
|
||||
}
|
||||
|
||||
func Test_ConfigElementTypes(t *testing.T) {
|
||||
// Tests element-based result format with element type assertions on DOCX
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"result_format":"element_based"}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`docx/unit_test_headers.docx`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.MimeType), `application/vnd.openxmlformats-officedocument.wordprocessingml.document`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
// skipped: field 'elements' not available on result type
|
||||
}
|
||||
|
||||
func Test_ConfigExtractionTimeout(t *testing.T) {
|
||||
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"extraction_timeout_secs":300}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
}
|
||||
|
||||
func Test_ConfigKeywords(t *testing.T) {
|
||||
// Tests keyword extraction via YAKE algorithm
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"keywords":{"algorithm":"yake","max_keywords":10}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'keywords' not available on Go ExtractionResult
|
||||
// skipped: field 'keywords' not available on Go ExtractionResult
|
||||
}
|
||||
|
||||
func Test_ConfigPages(t *testing.T) {
|
||||
// Tests page extraction and page marker configuration
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"pages":{"extract_pages":true,"insert_page_markers":true}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `PAGE`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ConfigQualityEnabled(t *testing.T) {
|
||||
// Tests quality scoring produces a score value in [0.0, 1.0]
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"enable_quality_processing":true}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'quality_score' not available on result type
|
||||
// skipped: field 'quality_score' not available on result type
|
||||
// skipped: field 'quality_score' not available on result type
|
||||
}
|
||||
|
||||
func Test_ConfigSecurityLimits(t *testing.T) {
|
||||
// Tests archive extraction with custom security limits
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`archives/documents.zip`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.MimeType), `application/zip`) { found = true }
|
||||
if strings.Contains(string(result.MimeType), `application/x-zip-compressed`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
}
|
||||
|
||||
func Test_ConfigTreeSitter(t *testing.T) {
|
||||
// Tests tree-sitter configuration round-trip
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`code/hello.py`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `text/x-source-code` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
|
||||
}
|
||||
|
||||
func Test_OutputFormatBytesMarkdown(t *testing.T) {
|
||||
// Tests markdown output format via bytes extraction API
|
||||
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
||||
}
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
func Test_OutputFormatMarkdown(t *testing.T) {
|
||||
// Tests Markdown output format
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
59
e2e/go/detection_test.go
generated
Normal file
59
e2e/go/detection_test.go
generated
Normal file
@@ -0,0 +1,59 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: detection
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_DetectMimeBytesHtml(t *testing.T) {
|
||||
// Detect HTML MIME from bytes
|
||||
contentBytes, contentBytesErr := os.ReadFile(`html/html.html`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture html/html.html: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_DetectMimeBytesPdf(t *testing.T) {
|
||||
// Detect PDF MIME type from bytes
|
||||
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_DetectMimeBytesPng(t *testing.T) {
|
||||
// Detect PNG MIME type from bytes
|
||||
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_GetExtensionsUnknownMime(t *testing.T) {
|
||||
// get_extensions unknown MIME
|
||||
_, err := kreuzberg.GetExtensionsForMime(`application/x-totally-unknown`)
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
27
e2e/go/document_extractor_management_test.go
generated
Normal file
27
e2e/go/document_extractor_management_test.go
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: document_extractor_management
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_DocumentExtractorsClear(t *testing.T) {
|
||||
// Clear all document extractors and verify list is empty
|
||||
_ = kreuzberg.ClearDocumentExtractors()
|
||||
}
|
||||
|
||||
func Test_ExtractorsList(t *testing.T) {
|
||||
// List all registered document extractors
|
||||
_, err := kreuzberg.ListDocumentExtractors()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
61
e2e/go/embed_async_pending_test.go
generated
Normal file
61
e2e/go/embed_async_pending_test.go
generated
Normal file
@@ -0,0 +1,61 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: embed_async_pending
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_EmbedTextsAsyncEmptyInput(t *testing.T) {
|
||||
// embed_texts_async: empty text list
|
||||
var texts []string
|
||||
if err := json.Unmarshal([]byte(`[]`), &texts); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.EmbedTextsAsync(texts, kreuzberg.EmbeddingConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
value := result
|
||||
assert.Equal(t, len(value), 0, "expected exactly 0 elements")
|
||||
}
|
||||
|
||||
func Test_EmbedTextsAsyncHappy(t *testing.T) {
|
||||
// embed_texts_async: basic async embedding
|
||||
var texts []string
|
||||
if err := json.Unmarshal([]byte(`["first","second"]`), &texts); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.EmbedTextsAsync(texts, kreuzberg.EmbeddingConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
value := result
|
||||
assert.GreaterOrEqual(t, len(value), 2, "expected at least 2 elements")
|
||||
}
|
||||
|
||||
func Test_EmbedTextsAsyncPresetSwitch(t *testing.T) {
|
||||
// embed_texts_async: preset override
|
||||
var texts []string
|
||||
if err := json.Unmarshal([]byte(`["text"]`), &texts); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
var config kreuzberg.EmbeddingConfig
|
||||
if err := json.Unmarshal([]byte(`{"model":{"name":"balanced","type":"preset"}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.EmbedTextsAsync(texts, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
31
e2e/go/embed_extra_test.go
generated
Normal file
31
e2e/go/embed_extra_test.go
generated
Normal file
@@ -0,0 +1,31 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: embed_extra
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_EmbedTextsBatch(t *testing.T) {
|
||||
// Batch embed texts
|
||||
var texts []string
|
||||
if err := json.Unmarshal([]byte(`["hello","world"]`), &texts); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
var config kreuzberg.EmbeddingConfig
|
||||
if err := json.Unmarshal([]byte(`{"model":{"name":"balanced","type":"preset"}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.EmbedTexts(texts, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
27
e2e/go/embedding_backend_management_test.go
generated
Normal file
27
e2e/go/embedding_backend_management_test.go
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: embedding_backend_management
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_EmbeddingBackendsClear(t *testing.T) {
|
||||
// Clear all embedding backends and verify list is empty
|
||||
_ = kreuzberg.ClearEmbeddingBackends()
|
||||
}
|
||||
|
||||
func Test_EmbeddingBackendsList(t *testing.T) {
|
||||
// List all registered embedding backends
|
||||
_, err := kreuzberg.ListEmbeddingBackends()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
62
e2e/go/embeddings_test.go
generated
Normal file
62
e2e/go/embeddings_test.go
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: embeddings
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_EmbedTextsDifferentPreset(t *testing.T) {
|
||||
// embed_texts: multilingual preset
|
||||
var texts []string
|
||||
if err := json.Unmarshal([]byte(`["Hello world","test"]`), &texts); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
var config kreuzberg.EmbeddingConfig
|
||||
if err := json.Unmarshal([]byte(`{"model":{"name":"multilingual","type":"preset"}}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.EmbedTexts(texts, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
value := result
|
||||
assert.GreaterOrEqual(t, len(value), 2, "expected at least 2 elements")
|
||||
}
|
||||
|
||||
func Test_GetEmbeddingPresetKnown(t *testing.T) {
|
||||
// get_embedding_preset: known preset
|
||||
_ = kreuzberg.GetEmbeddingPreset(`balanced`)
|
||||
}
|
||||
|
||||
func Test_GetEmbeddingPresetNominal(t *testing.T) {
|
||||
// get_embedding_preset: nominal case
|
||||
_ = kreuzberg.GetEmbeddingPreset(`balanced`)
|
||||
}
|
||||
|
||||
func Test_GetEmbeddingPresetUnknown(t *testing.T) {
|
||||
// get_embedding_preset: unknown preset fails
|
||||
result := kreuzberg.GetEmbeddingPreset(`nonexistent-xyz`)
|
||||
if result != nil {
|
||||
t.Errorf("expected empty value, got %v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ListEmbeddingPresetsSanity(t *testing.T) {
|
||||
// list_embedding_presets: returns at least one
|
||||
result := kreuzberg.ListEmbeddingPresets()
|
||||
value := result
|
||||
if len(value) == 0 {
|
||||
t.Errorf("expected non-empty value")
|
||||
}
|
||||
}
|
||||
80
e2e/go/error_test.go
generated
Normal file
80
e2e/go/error_test.go
generated
Normal file
@@ -0,0 +1,80 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: error
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_ErrorEmptyBytes(t *testing.T) {
|
||||
// Graceful handling of empty bytes (should not error)
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/empty.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/empty.txt: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytesSync(contentBytes, `text/plain`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ErrorEmptyMime(t *testing.T) {
|
||||
// Error when extracting with empty MIME type
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytesSync(contentBytes, ``, kreuzberg.ExtractionConfig{})
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ErrorExtractBytesConflictingOcr(t *testing.T) {
|
||||
// extract_bytes force+disable OCR
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/fake_text.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/fake_text.txt: %v", contentBytesErr)
|
||||
}
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"disable_ocr":true,"force_ocr":true}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytesSync(contentBytes, `text/plain`, config)
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ErrorInvalidMimeFormat(t *testing.T) {
|
||||
// Error when extracting with invalid MIME type format
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytesSync(contentBytes, `not-a-mime`, kreuzberg.ExtractionConfig{})
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ErrorUnsupportedMime(t *testing.T) {
|
||||
// Error when extracting with unsupported MIME type
|
||||
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.ExtractBytesSync(contentBytes, `application/x-nonexistent`, kreuzberg.ExtractionConfig{})
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
86
e2e/go/format_specific_test.go
generated
Normal file
86
e2e/go/format_specific_test.go
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: format_specific
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_FormatDocxStandalone(t *testing.T) {
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
||||
}
|
||||
|
||||
func Test_FormatHwpxStandalone(t *testing.T) {
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
||||
if !strings.Contains(string(result.Content), `Hello from HWPX`) {
|
||||
t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_FormatPdfText(t *testing.T) {
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `Mallori`) { found = true }
|
||||
if strings.Contains(string(result.Content), `May`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_FormatPptx(t *testing.T) {
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation`
|
||||
_, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_FormatXlsx(t *testing.T) {
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
|
||||
_, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
10
e2e/go/go.mod
generated
Normal file
10
e2e/go/go.mod
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
module e2e_go
|
||||
|
||||
go 1.26
|
||||
|
||||
require (
|
||||
github.com/kreuzberg-dev/kreuzberg/v5 v5.0.0-rc.3
|
||||
github.com/stretchr/testify v1.11.1
|
||||
)
|
||||
|
||||
replace github.com/kreuzberg-dev/kreuzberg/v5 => ../../packages/go/v5
|
||||
13
e2e/go/helpers_test.go
generated
Normal file
13
e2e/go/helpers_test.go
generated
Normal file
@@ -0,0 +1,13 @@
|
||||
package e2e_test
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
// jsonString converts a value to its JSON string representation.
|
||||
// Array fields use jsonString instead of fmt.Sprint to preserve structure.
|
||||
func jsonString(value any) string {
|
||||
encoded, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return string(encoded)
|
||||
}
|
||||
87
e2e/go/main_test.go
generated
Normal file
87
e2e/go/main_test.go
generated
Normal file
@@ -0,0 +1,87 @@
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
_, filename, _, _ := runtime.Caller(0)
|
||||
dir := filepath.Dir(filename)
|
||||
|
||||
// Change to the configured test-documents directory (if it exists) so that fixture
|
||||
// file paths like "pdf/fake_memo.pdf" resolve correctly when running go test
|
||||
// from e2e/go/. Repos without document fixtures (web crawler, network clients) do
|
||||
// not ship this directory — skip chdir and run from e2e/go/.
|
||||
testDocumentsDir := filepath.Join(dir, "..", "..", "test_documents")
|
||||
if info, err := os.Stat(testDocumentsDir); err == nil && info.IsDir() {
|
||||
if err := os.Chdir(testDocumentsDir); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// If MOCK_SERVER_URL is already set, a parent process (e.g. `alef test-apps run`)
|
||||
// started a shared mock-server and exported its URL (plus any MOCK_SERVERS /
|
||||
// MOCK_SERVER_<FIXTURE_ID> vars). Use it as-is and do NOT spawn our own server.
|
||||
if os.Getenv("MOCK_SERVER_URL") != "" {
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
// Start the mock HTTP server if it exists.
|
||||
mockServerBin := filepath.Join(dir, "..", "rust", "target", "release", "mock-server")
|
||||
if _, err := os.Stat(mockServerBin); err == nil {
|
||||
fixturesDir := filepath.Join(dir, "..", "..", "fixtures")
|
||||
cmd := exec.Command(mockServerBin, fixturesDir)
|
||||
cmd.Stderr = os.Stderr
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// Keep a writable pipe to the mock-server's stdin so the
|
||||
// server does not see EOF and exit immediately. The mock-server
|
||||
// blocks reading stdin until the parent closes the pipe.
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "MOCK_SERVER_URL=") {
|
||||
_ = os.Setenv("MOCK_SERVER_URL", strings.TrimPrefix(line, "MOCK_SERVER_URL="))
|
||||
} else if strings.HasPrefix(line, "MOCK_SERVERS=") {
|
||||
_jsonVal := strings.TrimPrefix(line, "MOCK_SERVERS=")
|
||||
_ = os.Setenv("MOCK_SERVERS", _jsonVal)
|
||||
// Parse the JSON map and set per-fixture env vars (MOCK_SERVER_<FIXTURE_ID>).
|
||||
var _perFixture map[string]string
|
||||
if err := json.Unmarshal([]byte(_jsonVal), &_perFixture); err == nil {
|
||||
for _fid, _furl := range _perFixture {
|
||||
_ = os.Setenv("MOCK_SERVER_"+strings.ToUpper(_fid), _furl)
|
||||
}
|
||||
}
|
||||
break
|
||||
} else if os.Getenv("MOCK_SERVER_URL") != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
go func() { _, _ = io.Copy(io.Discard, stdout) }()
|
||||
code := m.Run()
|
||||
_ = stdin.Close()
|
||||
_ = cmd.Process.Signal(os.Interrupt)
|
||||
_ = cmd.Wait()
|
||||
os.Exit(code)
|
||||
} else {
|
||||
code := m.Run()
|
||||
os.Exit(code)
|
||||
}
|
||||
}
|
||||
58
e2e/go/mime_utilities_test.go
generated
Normal file
58
e2e/go/mime_utilities_test.go
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: mime_utilities
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_MimeDetectBytes(t *testing.T) {
|
||||
// Detect MIME type from file bytes
|
||||
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(string(result), `pdf`) {
|
||||
t.Errorf("expected to contain %s, got %v", `pdf`, result)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_MimeDetectImage(t *testing.T) {
|
||||
// Detect MIME type from PNG image bytes
|
||||
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(string(result), `png`) {
|
||||
t.Errorf("expected to contain %s, got %v", `png`, result)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_MimeGetExtensions(t *testing.T) {
|
||||
// Get file extensions for a MIME type
|
||||
result, err := kreuzberg.GetExtensionsForMime(`application/pdf`)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
value := result
|
||||
if !strings.Contains(jsonString(value), `pdf`) {
|
||||
t.Errorf("expected to contain %s, got %v", `pdf`, value)
|
||||
}
|
||||
}
|
||||
32
e2e/go/ocr_backend_management_test.go
generated
Normal file
32
e2e/go/ocr_backend_management_test.go
generated
Normal file
@@ -0,0 +1,32 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: ocr_backend_management
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_OcrBackendsClear(t *testing.T) {
|
||||
// Clear all OCR backends and verify list is empty
|
||||
_ = kreuzberg.ClearOcrBackends()
|
||||
}
|
||||
|
||||
func Test_OcrBackendsList(t *testing.T) {
|
||||
// List all registered OCR backends
|
||||
_, err := kreuzberg.ListOcrBackends()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_OcrBackendsUnregister(t *testing.T) {
|
||||
// Unregister nonexistent OCR backend gracefully
|
||||
_ = kreuzberg.UnregisterOcrBackend(`nonexistent-backend-xyz`)
|
||||
}
|
||||
43
e2e/go/pdf_test.go
generated
Normal file
43
e2e/go/pdf_test.go
generated
Normal file
@@ -0,0 +1,43 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: pdf
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_RenderPdfPageFirst(t *testing.T) {
|
||||
// render_pdf_page_to_png: first page
|
||||
pdf_bytesBytes, pdf_bytesBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if pdf_bytesBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", pdf_bytesBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.RenderPdfPageToPng(pdf_bytesBytes, 0, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
value := result
|
||||
assert.GreaterOrEqual(t, len(value), 100, "expected length >= 100")
|
||||
}
|
||||
|
||||
func Test_RenderPdfPageOutOfRange(t *testing.T) {
|
||||
// render_pdf_page_to_png: page out of range
|
||||
pdf_bytesBytes, pdf_bytesBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
|
||||
if pdf_bytesBytesErr != nil {
|
||||
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", pdf_bytesBytesErr)
|
||||
}
|
||||
_, err := kreuzberg.RenderPdfPageToPng(pdf_bytesBytes, 999, nil, nil)
|
||||
if err == nil {
|
||||
t.Errorf("expected an error, but call succeeded")
|
||||
}
|
||||
}
|
||||
148
e2e/go/plugin_api_test.go
generated
Normal file
148
e2e/go/plugin_api_test.go
generated
Normal file
@@ -0,0 +1,148 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: plugin_api
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
type testStub_register_document_extractor_trait_bridge struct{}
|
||||
|
||||
func (testStub_register_document_extractor_trait_bridge) ExtractBytes(content []byte, mimeType string, config kreuzberg.ExtractionConfig) (json.RawMessage, error) { return nil, nil }
|
||||
func (testStub_register_document_extractor_trait_bridge) ExtractFile(path string, mimeType string, config kreuzberg.ExtractionConfig) (json.RawMessage, error) { return nil, nil }
|
||||
func (testStub_register_document_extractor_trait_bridge) SupportedMimeTypes() []string { return nil }
|
||||
func (testStub_register_document_extractor_trait_bridge) Priority() int32 { return 0 }
|
||||
func (testStub_register_document_extractor_trait_bridge) CanHandle(path string, mimeType string) bool { return false }
|
||||
func (testStub_register_document_extractor_trait_bridge) Name() string { return "" }
|
||||
func (testStub_register_document_extractor_trait_bridge) Version() string { return "" }
|
||||
func (testStub_register_document_extractor_trait_bridge) Initialize() error { return nil }
|
||||
func (testStub_register_document_extractor_trait_bridge) Shutdown() error { return nil }
|
||||
func (testStub_register_document_extractor_trait_bridge) Description() string { return "" }
|
||||
func (testStub_register_document_extractor_trait_bridge) Author() string { return "" }
|
||||
|
||||
func Test_RegisterDocumentExtractorTraitBridge(t *testing.T) {
|
||||
// register_document_extractor: trait bridge
|
||||
_ = kreuzberg.RegisterDocumentExtractor(testStub_register_document_extractor_trait_bridge{})
|
||||
}
|
||||
|
||||
type testStub_register_embedding_backend_trait_bridge struct{}
|
||||
|
||||
func (testStub_register_embedding_backend_trait_bridge) Dimensions() uint { return 0 }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Embed(texts []string) ([][]float32, error) { return nil, nil }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Name() string { return "" }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Version() string { return "" }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Initialize() error { return nil }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Shutdown() error { return nil }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Description() string { return "" }
|
||||
func (testStub_register_embedding_backend_trait_bridge) Author() string { return "" }
|
||||
|
||||
func Test_RegisterEmbeddingBackendTraitBridge(t *testing.T) {
|
||||
// register_embedding_backend: trait bridge
|
||||
_ = kreuzberg.RegisterEmbeddingBackend(testStub_register_embedding_backend_trait_bridge{})
|
||||
}
|
||||
|
||||
type testStub_register_ocr_backend_trait_bridge struct{}
|
||||
|
||||
func (testStub_register_ocr_backend_trait_bridge) ProcessImage(imageBytes []byte, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
|
||||
func (testStub_register_ocr_backend_trait_bridge) ProcessImageFile(path string, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
|
||||
func (testStub_register_ocr_backend_trait_bridge) SupportsLanguage(lang string) bool { return false }
|
||||
func (testStub_register_ocr_backend_trait_bridge) BackendType() kreuzberg.OcrBackendType { return "" }
|
||||
func (testStub_register_ocr_backend_trait_bridge) SupportedLanguages() []string { return nil }
|
||||
func (testStub_register_ocr_backend_trait_bridge) SupportsTableDetection() bool { return false }
|
||||
func (testStub_register_ocr_backend_trait_bridge) SupportsDocumentProcessing() bool { return false }
|
||||
func (testStub_register_ocr_backend_trait_bridge) ProcessDocument(path string, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
|
||||
func (testStub_register_ocr_backend_trait_bridge) Name() string { return "" }
|
||||
func (testStub_register_ocr_backend_trait_bridge) Version() string { return "" }
|
||||
func (testStub_register_ocr_backend_trait_bridge) Initialize() error { return nil }
|
||||
func (testStub_register_ocr_backend_trait_bridge) Shutdown() error { return nil }
|
||||
func (testStub_register_ocr_backend_trait_bridge) Description() string { return "" }
|
||||
func (testStub_register_ocr_backend_trait_bridge) Author() string { return "" }
|
||||
|
||||
func Test_RegisterOcrBackendTraitBridge(t *testing.T) {
|
||||
// register_ocr_backend: trait bridge
|
||||
_ = kreuzberg.RegisterOcrBackend(testStub_register_ocr_backend_trait_bridge{})
|
||||
}
|
||||
|
||||
type testStub_register_post_processor_trait_bridge struct{}
|
||||
|
||||
func (testStub_register_post_processor_trait_bridge) Process(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) error { return nil }
|
||||
func (testStub_register_post_processor_trait_bridge) ProcessingStage() kreuzberg.ProcessingStage { return "" }
|
||||
func (testStub_register_post_processor_trait_bridge) ShouldProcess(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) bool { return false }
|
||||
func (testStub_register_post_processor_trait_bridge) EstimatedDurationMs(result kreuzberg.ExtractionResult) uint64 { return 0 }
|
||||
func (testStub_register_post_processor_trait_bridge) Priority() int32 { return 0 }
|
||||
func (testStub_register_post_processor_trait_bridge) Name() string { return "" }
|
||||
func (testStub_register_post_processor_trait_bridge) Version() string { return "" }
|
||||
func (testStub_register_post_processor_trait_bridge) Initialize() error { return nil }
|
||||
func (testStub_register_post_processor_trait_bridge) Shutdown() error { return nil }
|
||||
func (testStub_register_post_processor_trait_bridge) Description() string { return "" }
|
||||
func (testStub_register_post_processor_trait_bridge) Author() string { return "" }
|
||||
|
||||
func Test_RegisterPostProcessorTraitBridge(t *testing.T) {
|
||||
// register_post_processor: trait bridge
|
||||
_ = kreuzberg.RegisterPostProcessor(testStub_register_post_processor_trait_bridge{})
|
||||
}
|
||||
|
||||
type testStub_register_renderer_trait_bridge struct{}
|
||||
|
||||
func (testStub_register_renderer_trait_bridge) Render(doc json.RawMessage) (string, error) { return "", nil }
|
||||
func (testStub_register_renderer_trait_bridge) Name() string { return "" }
|
||||
func (testStub_register_renderer_trait_bridge) Version() string { return "" }
|
||||
func (testStub_register_renderer_trait_bridge) Initialize() error { return nil }
|
||||
func (testStub_register_renderer_trait_bridge) Shutdown() error { return nil }
|
||||
func (testStub_register_renderer_trait_bridge) Description() string { return "" }
|
||||
func (testStub_register_renderer_trait_bridge) Author() string { return "" }
|
||||
|
||||
func Test_RegisterRendererTraitBridge(t *testing.T) {
|
||||
// register_renderer: trait bridge
|
||||
_ = kreuzberg.RegisterRenderer(testStub_register_renderer_trait_bridge{})
|
||||
}
|
||||
|
||||
type testStub_register_validator_trait_bridge struct{}
|
||||
|
||||
func (testStub_register_validator_trait_bridge) Validate(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) error { return nil }
|
||||
func (testStub_register_validator_trait_bridge) ShouldValidate(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) bool { return false }
|
||||
func (testStub_register_validator_trait_bridge) Priority() int32 { return 0 }
|
||||
func (testStub_register_validator_trait_bridge) Name() string { return "" }
|
||||
func (testStub_register_validator_trait_bridge) Version() string { return "" }
|
||||
func (testStub_register_validator_trait_bridge) Initialize() error { return nil }
|
||||
func (testStub_register_validator_trait_bridge) Shutdown() error { return nil }
|
||||
func (testStub_register_validator_trait_bridge) Description() string { return "" }
|
||||
func (testStub_register_validator_trait_bridge) Author() string { return "" }
|
||||
|
||||
func Test_RegisterValidatorTraitBridge(t *testing.T) {
|
||||
// register_validator: trait bridge
|
||||
_ = kreuzberg.RegisterValidator(testStub_register_validator_trait_bridge{})
|
||||
}
|
||||
|
||||
func Test_UnregisterDocumentExtractorAfterRegister(t *testing.T) {
|
||||
// unregister_document_extractor
|
||||
_ = kreuzberg.UnregisterDocumentExtractor(`test-extractor`)
|
||||
}
|
||||
|
||||
func Test_UnregisterEmbeddingBackendAfterRegister(t *testing.T) {
|
||||
// unregister_embedding_backend
|
||||
_ = kreuzberg.UnregisterEmbeddingBackend(`test-embedding-backend`)
|
||||
}
|
||||
|
||||
func Test_UnregisterPostProcessorAfterRegister(t *testing.T) {
|
||||
// unregister_post_processor
|
||||
_ = kreuzberg.UnregisterPostProcessor(`test-processor`)
|
||||
}
|
||||
|
||||
func Test_UnregisterRendererAfterRegister(t *testing.T) {
|
||||
// unregister_renderer
|
||||
_ = kreuzberg.UnregisterRenderer(`test-renderer`)
|
||||
}
|
||||
|
||||
func Test_UnregisterValidatorAfterRegister(t *testing.T) {
|
||||
// unregister_validator
|
||||
_ = kreuzberg.UnregisterValidator(`test-validator`)
|
||||
}
|
||||
27
e2e/go/post_processor_management_test.go
generated
Normal file
27
e2e/go/post_processor_management_test.go
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: post_processor_management
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_PostProcessorsClear(t *testing.T) {
|
||||
// Clear all post-processors and verify list is empty
|
||||
_ = kreuzberg.ClearPostProcessors()
|
||||
}
|
||||
|
||||
func Test_PostProcessorsList(t *testing.T) {
|
||||
// List all registered post-processors
|
||||
_, err := kreuzberg.ListPostProcessors()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
38
e2e/go/registry_operations_test.go
generated
Normal file
38
e2e/go/registry_operations_test.go
generated
Normal file
@@ -0,0 +1,38 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: registry_operations
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_ExtensionsDocx(t *testing.T) {
|
||||
// Get file extensions for DOCX MIME type
|
||||
_, err := kreuzberg.GetExtensionsForMime(`application/vnd.openxmlformats-officedocument.wordprocessingml.document`)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ExtensionsHtml(t *testing.T) {
|
||||
// Get file extensions for HTML MIME type
|
||||
_, err := kreuzberg.GetExtensionsForMime(`text/html`)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ExtensionsPdf(t *testing.T) {
|
||||
// Get file extensions for PDF MIME type
|
||||
_, err := kreuzberg.GetExtensionsForMime(`application/pdf`)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
62
e2e/go/registry_test.go
generated
Normal file
62
e2e/go/registry_test.go
generated
Normal file
@@ -0,0 +1,62 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: registry
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_ListDocumentExtractors(t *testing.T) {
|
||||
// List document extractors
|
||||
_, err := kreuzberg.ListDocumentExtractors()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ListEmbeddingBackends(t *testing.T) {
|
||||
// List embedding backends
|
||||
_, err := kreuzberg.ListEmbeddingBackends()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ListOcrBackends(t *testing.T) {
|
||||
// List OCR backends
|
||||
_, err := kreuzberg.ListOcrBackends()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ListPostProcessors(t *testing.T) {
|
||||
// List post-processors
|
||||
_, err := kreuzberg.ListPostProcessors()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ListRenderers(t *testing.T) {
|
||||
// List renderers
|
||||
_, err := kreuzberg.ListRenderers()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_ListValidators(t *testing.T) {
|
||||
// List validators
|
||||
_, err := kreuzberg.ListValidators()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
27
e2e/go/renderer_management_test.go
generated
Normal file
27
e2e/go/renderer_management_test.go
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: renderer_management
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_RenderersClear(t *testing.T) {
|
||||
// Clear all renderers and verify list is empty
|
||||
_ = kreuzberg.ClearRenderers()
|
||||
}
|
||||
|
||||
func Test_RenderersList(t *testing.T) {
|
||||
// List all registered renderers
|
||||
_, err := kreuzberg.ListRenderers()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
196
e2e/go/smoke_test.go
generated
Normal file
196
e2e/go/smoke_test.go
generated
Normal file
@@ -0,0 +1,196 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: smoke
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_OcrImagePng(t *testing.T) {
|
||||
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
|
||||
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
|
||||
if contentBytesErr != nil {
|
||||
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
|
||||
}
|
||||
result, err := kreuzberg.ExtractBytes(contentBytes, `image/png`, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 1, "expected length >= 1")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `Hello`) { found = true }
|
||||
if strings.Contains(string(result.Content), `World`) { found = true }
|
||||
if strings.Contains(string(result.Content), `hello`) { found = true }
|
||||
if strings.Contains(string(result.Content), `world`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_SmokeDocxBasic(t *testing.T) {
|
||||
// Smoke test: DOCX with formatted text
|
||||
mime_typeVal := `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
|
||||
result, err := kreuzberg.ExtractFile(`docx/fake.docx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `Lorem`) { found = true }
|
||||
if strings.Contains(string(result.Content), `ipsum`) { found = true }
|
||||
if strings.Contains(string(result.Content), `document`) { found = true }
|
||||
if strings.Contains(string(result.Content), `text`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_SmokeHtmlBasic(t *testing.T) {
|
||||
// Smoke test: HTML table extraction
|
||||
mime_typeVal := `text/html`
|
||||
result, err := kreuzberg.ExtractFile(`html/simple_table.html`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `text/html` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `Sample Data Table`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Laptop`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Electronics`) { found = true }
|
||||
if strings.Contains(string(result.Content), `Product`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_SmokeImagePng(t *testing.T) {
|
||||
// Smoke test: PNG image (without OCR, metadata only)
|
||||
var config kreuzberg.ExtractionConfig
|
||||
if err := json.Unmarshal([]byte(`{"disable_ocr":true}`), &config); err != nil {
|
||||
t.Fatalf("config parse failed: %v", err)
|
||||
}
|
||||
result, err := kreuzberg.ExtractFile(`images/sample.png`, nil, config)
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_SmokeJsonBasic(t *testing.T) {
|
||||
// Smoke test: JSON file extraction
|
||||
mime_typeVal := `application/json`
|
||||
result, err := kreuzberg.ExtractFile(`json/simple.json`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/json` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
|
||||
}
|
||||
|
||||
func Test_SmokePdfBasic(t *testing.T) {
|
||||
// Smoke test: PDF with simple text extraction
|
||||
mime_typeVal := `application/pdf`
|
||||
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
|
||||
{
|
||||
found := false
|
||||
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
|
||||
if strings.Contains(string(result.Content), `To Whom it May Concern`) { found = true }
|
||||
if !found {
|
||||
t.Errorf("expected to contain at least one of the specified values")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_SmokeTxtBasic(t *testing.T) {
|
||||
// Smoke test: Plain text file
|
||||
mime_typeVal := `text/plain`
|
||||
result, err := kreuzberg.ExtractFile(`text/report.txt`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `text/plain` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
|
||||
}
|
||||
|
||||
func Test_SmokeXlsxBasic(t *testing.T) {
|
||||
// Smoke test: XLSX with basic spreadsheet data including tables
|
||||
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
|
||||
result, err := kreuzberg.ExtractFile(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` {
|
||||
t.Errorf("equals mismatch: got %v", result.MimeType)
|
||||
}
|
||||
assert.GreaterOrEqual(t, len(result.Content), 100, "expected length >= 100")
|
||||
if !strings.Contains(string(result.Content), `Team`) {
|
||||
t.Errorf("expected to contain %s", `Team`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `Location`) {
|
||||
t.Errorf("expected to contain %s", `Location`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `Stanley Cups`) {
|
||||
t.Errorf("expected to contain %s", `Stanley Cups`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `Blues`) {
|
||||
t.Errorf("expected to contain %s", `Blues`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `Flyers`) {
|
||||
t.Errorf("expected to contain %s", `Flyers`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `Maple Leafs`) {
|
||||
t.Errorf("expected to contain %s", `Maple Leafs`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `STL`) {
|
||||
t.Errorf("expected to contain %s", `STL`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `PHI`) {
|
||||
t.Errorf("expected to contain %s", `PHI`)
|
||||
}
|
||||
if !strings.Contains(string(result.Content), `TOR`) {
|
||||
t.Errorf("expected to contain %s", `TOR`)
|
||||
}
|
||||
// skipped: field 'tables' not available on result type
|
||||
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
|
||||
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
}
|
||||
27
e2e/go/validator_management_test.go
generated
Normal file
27
e2e/go/validator_management_test.go
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
|
||||
// E2e tests for category: validator_management
|
||||
package e2e_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
|
||||
)
|
||||
|
||||
func Test_ValidatorsClear(t *testing.T) {
|
||||
// Clear all validators and verify list is empty
|
||||
_ = kreuzberg.ClearValidators()
|
||||
}
|
||||
|
||||
func Test_ValidatorsList(t *testing.T) {
|
||||
// List all registered validators
|
||||
_, err := kreuzberg.ListValidators()
|
||||
if err != nil {
|
||||
t.Fatalf("call failed: %v", err)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user