Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

373
e2e/zig/build.zig generated Normal file
View File

@@ -0,0 +1,373 @@
const std = @import("std");
const builtin = @import("builtin");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const test_step = b.step("test", "Run tests");
const ffi_path = b.option([]const u8, "ffi_path", "Path to directory containing libkreuzberg_ffi") orelse "../../target/release";
const ffi_include = b.option([]const u8, "ffi_include_path", "Path to directory containing FFI header") orelse "../../crates/kreuzberg-ffi/include";
const kreuzberg_module = b.addModule("kreuzberg", .{
.root_source_file = b.path("../../packages/zig/src/kreuzberg.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
kreuzberg_module.addLibraryPath(.{ .cwd_relative = ffi_path });
kreuzberg_module.addIncludePath(.{ .cwd_relative = ffi_include });
kreuzberg_module.linkSystemLibrary("kreuzberg_ffi", .{});
const async_module = b.createModule(.{
.root_source_file = b.path("src/async_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
async_module.addImport("kreuzberg", kreuzberg_module);
const async_tests = b.addTest(.{
.name = "async_test",
.root_module = async_module,
.use_llvm = true,
});
const async_run = b.addRunArtifact(async_tests);
async_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&async_run.step);
const batch_module = b.createModule(.{
.root_source_file = b.path("src/batch_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
batch_module.addImport("kreuzberg", kreuzberg_module);
const batch_tests = b.addTest(.{
.name = "batch_test",
.root_module = batch_module,
.use_llvm = true,
});
const batch_run = b.addRunArtifact(batch_tests);
batch_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&batch_run.step);
const code_module = b.createModule(.{
.root_source_file = b.path("src/code_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
code_module.addImport("kreuzberg", kreuzberg_module);
const code_tests = b.addTest(.{
.name = "code_test",
.root_module = code_module,
.use_llvm = true,
});
const code_run = b.addRunArtifact(code_tests);
code_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&code_run.step);
const contract_module = b.createModule(.{
.root_source_file = b.path("src/contract_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
contract_module.addImport("kreuzberg", kreuzberg_module);
const contract_tests = b.addTest(.{
.name = "contract_test",
.root_module = contract_module,
.use_llvm = true,
});
const contract_run = b.addRunArtifact(contract_tests);
contract_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&contract_run.step);
const detection_module = b.createModule(.{
.root_source_file = b.path("src/detection_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
detection_module.addImport("kreuzberg", kreuzberg_module);
const detection_tests = b.addTest(.{
.name = "detection_test",
.root_module = detection_module,
.use_llvm = true,
});
const detection_run = b.addRunArtifact(detection_tests);
detection_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&detection_run.step);
const document_extractor_management_module = b.createModule(.{
.root_source_file = b.path("src/document_extractor_management_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
document_extractor_management_module.addImport("kreuzberg", kreuzberg_module);
const document_extractor_management_tests = b.addTest(.{
.name = "document_extractor_management_test",
.root_module = document_extractor_management_module,
.use_llvm = true,
});
const document_extractor_management_run = b.addRunArtifact(document_extractor_management_tests);
document_extractor_management_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&document_extractor_management_run.step);
const embed_async_pending_module = b.createModule(.{
.root_source_file = b.path("src/embed_async_pending_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
embed_async_pending_module.addImport("kreuzberg", kreuzberg_module);
const embed_async_pending_tests = b.addTest(.{
.name = "embed_async_pending_test",
.root_module = embed_async_pending_module,
.use_llvm = true,
});
const embed_async_pending_run = b.addRunArtifact(embed_async_pending_tests);
embed_async_pending_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&embed_async_pending_run.step);
const embed_extra_module = b.createModule(.{
.root_source_file = b.path("src/embed_extra_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
embed_extra_module.addImport("kreuzberg", kreuzberg_module);
const embed_extra_tests = b.addTest(.{
.name = "embed_extra_test",
.root_module = embed_extra_module,
.use_llvm = true,
});
const embed_extra_run = b.addRunArtifact(embed_extra_tests);
embed_extra_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&embed_extra_run.step);
const embedding_backend_management_module = b.createModule(.{
.root_source_file = b.path("src/embedding_backend_management_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
embedding_backend_management_module.addImport("kreuzberg", kreuzberg_module);
const embedding_backend_management_tests = b.addTest(.{
.name = "embedding_backend_management_test",
.root_module = embedding_backend_management_module,
.use_llvm = true,
});
const embedding_backend_management_run = b.addRunArtifact(embedding_backend_management_tests);
embedding_backend_management_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&embedding_backend_management_run.step);
const embeddings_module = b.createModule(.{
.root_source_file = b.path("src/embeddings_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
embeddings_module.addImport("kreuzberg", kreuzberg_module);
const embeddings_tests = b.addTest(.{
.name = "embeddings_test",
.root_module = embeddings_module,
.use_llvm = true,
});
const embeddings_run = b.addRunArtifact(embeddings_tests);
embeddings_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&embeddings_run.step);
const error_module = b.createModule(.{
.root_source_file = b.path("src/error_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
error_module.addImport("kreuzberg", kreuzberg_module);
const error_tests = b.addTest(.{
.name = "error_test",
.root_module = error_module,
.use_llvm = true,
});
const error_run = b.addRunArtifact(error_tests);
error_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&error_run.step);
const format_specific_module = b.createModule(.{
.root_source_file = b.path("src/format_specific_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
format_specific_module.addImport("kreuzberg", kreuzberg_module);
const format_specific_tests = b.addTest(.{
.name = "format_specific_test",
.root_module = format_specific_module,
.use_llvm = true,
});
const format_specific_run = b.addRunArtifact(format_specific_tests);
format_specific_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&format_specific_run.step);
const mime_utilities_module = b.createModule(.{
.root_source_file = b.path("src/mime_utilities_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
mime_utilities_module.addImport("kreuzberg", kreuzberg_module);
const mime_utilities_tests = b.addTest(.{
.name = "mime_utilities_test",
.root_module = mime_utilities_module,
.use_llvm = true,
});
const mime_utilities_run = b.addRunArtifact(mime_utilities_tests);
mime_utilities_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&mime_utilities_run.step);
const ocr_backend_management_module = b.createModule(.{
.root_source_file = b.path("src/ocr_backend_management_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
ocr_backend_management_module.addImport("kreuzberg", kreuzberg_module);
const ocr_backend_management_tests = b.addTest(.{
.name = "ocr_backend_management_test",
.root_module = ocr_backend_management_module,
.use_llvm = true,
});
const ocr_backend_management_run = b.addRunArtifact(ocr_backend_management_tests);
ocr_backend_management_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&ocr_backend_management_run.step);
const pdf_module = b.createModule(.{
.root_source_file = b.path("src/pdf_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
pdf_module.addImport("kreuzberg", kreuzberg_module);
const pdf_tests = b.addTest(.{
.name = "pdf_test",
.root_module = pdf_module,
.use_llvm = true,
});
const pdf_run = b.addRunArtifact(pdf_tests);
pdf_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&pdf_run.step);
const plugin_api_module = b.createModule(.{
.root_source_file = b.path("src/plugin_api_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
plugin_api_module.addImport("kreuzberg", kreuzberg_module);
const plugin_api_tests = b.addTest(.{
.name = "plugin_api_test",
.root_module = plugin_api_module,
.use_llvm = true,
});
const plugin_api_run = b.addRunArtifact(plugin_api_tests);
plugin_api_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&plugin_api_run.step);
const post_processor_management_module = b.createModule(.{
.root_source_file = b.path("src/post_processor_management_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
post_processor_management_module.addImport("kreuzberg", kreuzberg_module);
const post_processor_management_tests = b.addTest(.{
.name = "post_processor_management_test",
.root_module = post_processor_management_module,
.use_llvm = true,
});
const post_processor_management_run = b.addRunArtifact(post_processor_management_tests);
post_processor_management_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&post_processor_management_run.step);
const registry_module = b.createModule(.{
.root_source_file = b.path("src/registry_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
registry_module.addImport("kreuzberg", kreuzberg_module);
const registry_tests = b.addTest(.{
.name = "registry_test",
.root_module = registry_module,
.use_llvm = true,
});
const registry_run = b.addRunArtifact(registry_tests);
registry_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&registry_run.step);
const registry_operations_module = b.createModule(.{
.root_source_file = b.path("src/registry_operations_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
registry_operations_module.addImport("kreuzberg", kreuzberg_module);
const registry_operations_tests = b.addTest(.{
.name = "registry_operations_test",
.root_module = registry_operations_module,
.use_llvm = true,
});
const registry_operations_run = b.addRunArtifact(registry_operations_tests);
registry_operations_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&registry_operations_run.step);
const renderer_management_module = b.createModule(.{
.root_source_file = b.path("src/renderer_management_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
renderer_management_module.addImport("kreuzberg", kreuzberg_module);
const renderer_management_tests = b.addTest(.{
.name = "renderer_management_test",
.root_module = renderer_management_module,
.use_llvm = true,
});
const renderer_management_run = b.addRunArtifact(renderer_management_tests);
renderer_management_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&renderer_management_run.step);
const smoke_module = b.createModule(.{
.root_source_file = b.path("src/smoke_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
smoke_module.addImport("kreuzberg", kreuzberg_module);
const smoke_tests = b.addTest(.{
.name = "smoke_test",
.root_module = smoke_module,
.use_llvm = true,
});
const smoke_run = b.addRunArtifact(smoke_tests);
smoke_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&smoke_run.step);
const validator_management_module = b.createModule(.{
.root_source_file = b.path("src/validator_management_test.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
validator_management_module.addImport("kreuzberg", kreuzberg_module);
const validator_management_tests = b.addTest(.{
.name = "validator_management_test",
.root_module = validator_management_module,
.use_llvm = true,
});
const validator_management_run = b.addRunArtifact(validator_management_tests);
validator_management_run.setCwd(b.path("../../test_documents"));
test_step.dependOn(&validator_management_run.step);
}

16
e2e/zig/build.zig.zon generated Normal file
View File

@@ -0,0 +1,16 @@
.{
.name = .e2e_zig,
.version = "0.1.0",
.fingerprint = 0xf16334c0592376fc,
.minimum_zig_version = "0.16.0",
.dependencies = .{
.kreuzberg = .{
.path = "../../packages/zig",
},
},
.paths = .{
"build.zig",
"build.zig.zon",
"src",
},
}

63
e2e/zig/src/async_test.zig generated Normal file
View File

@@ -0,0 +1,63 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: async
test "async_extract_bytes" {
// Async extract_bytes call on PDF document
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/pdf", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 50);
}
test "async_extract_bytes_empty_mime" {
// extract_bytes empty MIME async
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "", "{}") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = _result_json;
}
test "async_extract_bytes_invalid_mime" {
// extract_bytes unsupported MIME async
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "application/x-nonexistent", "{}") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = _result_json;
}

87
e2e/zig/src/batch_test.zig generated Normal file
View File

@@ -0,0 +1,87 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: batch
test "batch_bytes_invalid_mime" {
// batch_extract_bytes_sync invalid MIME
suppress_abort();
_ = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[72,101,108,108,111],\"mime_type\":\"application/x-nonexistent\"}]", "{}");
}
test "batch_extract_bytes_happy" {
// batch_extract_bytes: happy path with mixed inputs
suppress_abort();
const result = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[72,101,108,108,111,44,32,119,111,114,108,100,33],\"mime_type\":\"text/plain\"},{\"content\":[60,104,116,109,108,62,60,98,111,100,121,62,84,101,115,116,60,47,98,111,100,121,62,60,47,104,116,109,108,62],\"mime_type\":\"text/html\"}]", "{}");
try testing.expect(result.len >= 1);
}
test "batch_extract_bytes_mixed_format" {
// batch_extract_bytes: handles unsupported MIME gracefully
suppress_abort();
_ = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[80,68,70,32,112,108,97,99,101,104,111,108,100,101,114],\"mime_type\":\"application/x-unknown\"}]", "{}");
}
test "batch_extract_bytes_sync_empty_list" {
// batch_extract_bytes_sync: empty batch
suppress_abort();
const result = try kreuzberg.batch_extract_bytes_sync("[]", "{}");
{
var _cparse = try std.json.parseFromSlice(std.json.Value, std.heap.c_allocator, result, .{});
defer _cparse.deinit();
try testing.expectEqual(0, _cparse.value.array.items.len);
}
}
test "batch_extract_bytes_sync_invalid_mime" {
// batch_extract_bytes_sync: unsupported MIME
suppress_abort();
_ = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[100,97,116,97],\"mime_type\":\"application/x-unknown\"}]", "{}");
}
test "batch_file_async_basic" {
// Extract text from multiple files asynchronously
suppress_abort();
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"pdf/fake_memo.pdf\"},{\"path\":\"text/fake_text.txt\"}]", "{}");
}
test "batch_file_async_not_found" {
// batch_extract_file async nonexistent
suppress_abort();
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"/nonexistent/a.pdf\"}]", "{}");
}
test "batch_file_not_found" {
// batch_extract_file_sync nonexistent
suppress_abort();
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"/nonexistent/a.pdf\"},{\"path\":\"/nonexistent/b.txt\"}]", "{}");
}
test "batch_file_partial" {
// batch_extract_file_sync mixed
suppress_abort();
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"text/plain.txt\"},{\"path\":\"/nonexistent/missing.pdf\"}]", "{}");
}
test "batch_file_sync_basic" {
// Extract text from multiple files synchronously
suppress_abort();
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"pdf/fake_memo.pdf\"},{\"path\":\"text/fake_text.txt\"}]", "{}");
}

49
e2e/zig/src/code_test.zig generated Normal file
View File

@@ -0,0 +1,49 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: code
test "code_shebang_detection" {
// Test language detection from shebang line via bytes input
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("code/script.sh", "text/x-source-code", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("text/x-source-code", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
{
const _jva0 = result.object.get("content").?;
const _jsa0 = if (_jva0 == .string) _jva0.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva0, .{});
defer if (_jva0 != .string) std.heap.c_allocator.free(_jsa0);
try testing.expect(std.mem.indexOf(u8, _jsa0, "build") != null);
}
{
const _jva1 = result.object.get("content").?;
const _jsa1 = if (_jva1 == .string) _jva1.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva1, .{});
defer if (_jva1 != .string) std.heap.c_allocator.free(_jsa1);
try testing.expect(std.mem.indexOf(u8, _jsa1, "clean") != null);
}
}

346
e2e/zig/src/contract_test.zig generated Normal file
View File

@@ -0,0 +1,346 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: contract
test "api_batch_bytes_async" {
// Tests async batch bytes extraction API (batch_extract_bytes)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
);
}
test "api_batch_bytes_with_configs_async" {
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"output_format\":\"markdown\"}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'metadata.output_format' not available on result type
}
test "api_batch_file_async" {
// Tests async batch file extraction API (batch_extract_file)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
);
}
test "api_batch_file_with_configs_async" {
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"output_format\":\"markdown\"}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'metadata.output_format' not available on result type
}
test "api_extract_bytes_async" {
// Tests async bytes extraction API (extract_bytes)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
);
}
test "api_extract_file_async" {
// Tests async file extraction API (extract_file)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
);
}
test "config_chunking_prepend_heading_context" {
// Tests markdown chunker prepends heading hierarchy to chunk content
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("markdown/extraction_test.md", null, "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'chunks' not available on result type
{
const _chunks_opt = result.object.get("chunks");
var _all: bool = true;
if (_chunks_opt) |_chunks_val| {
if (_chunks_val == .array) {
if (_chunks_val.array.items.len == 0) _all = false;
for (_chunks_val.array.items) |c| {
if (c != .object) { _all = false; break; }
const _v = c.object.get("content");
if (_v == null or _v.? != .string or _v.?.string.len == 0) { _all = false; break; }
}
} else { _all = false; }
} else { _all = false; }
try testing.expect(_all);
}
// skipped: synthetic field 'chunks_have_heading_context' not derivable from JSON value alone
// skipped: synthetic field 'first_chunk_starts_with_heading' not derivable from JSON value alone
}
test "config_document_structure_with_headings" {
// Tests document structure with DOCX heading-driven nesting
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("docx/fake.docx", null, "{\"include_document_structure\":true}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/vnd.openxmlformats-officedocument.wordprocessingml.document", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
// skipped: field 'document' not available on result type
// skipped: field 'document.nodes' not available on result type
}
test "config_element_types" {
// Tests element-based result format with element type assertions on DOCX
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("docx/unit_test_headers.docx", null, "{\"result_format\":\"element_based\"}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expect(
std.mem.indexOf(u8, result.object.get("mime_type").?.string, "application/vnd.openxmlformats-officedocument.wordprocessingml.document") != null
);
// skipped: field 'elements' not available on result type
}
test "config_extraction_timeout" {
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"extraction_timeout_secs\":300}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
}
test "config_keywords" {
// Tests keyword extraction via YAKE algorithm
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'keywords' not available on JSON-struct ExtractionResult
// skipped: field 'keywords' not available on JSON-struct ExtractionResult
}
test "config_pages" {
// Tests page extraction and page marker configuration
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "PAGE") != null
);
}
test "config_quality_enabled" {
// Tests quality scoring produces a score value in [0.0, 1.0]
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"enable_quality_processing\":true}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
}
test "config_security_limits" {
// Tests archive extraction with custom security limits
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("archives/documents.zip", null, "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expect(
std.mem.indexOf(u8, result.object.get("mime_type").?.string, "application/zip") != null or
std.mem.indexOf(u8, result.object.get("mime_type").?.string, "application/x-zip-compressed") != null
);
try testing.expect(result.object.get("content").?.string.len >= 10);
}
test "config_tree_sitter" {
// Tests tree-sitter configuration round-trip
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("code/hello.py", null, "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("text/x-source-code", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 5);
}
test "output_format_bytes_markdown" {
// Tests markdown output format via bytes extraction API
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/pdf", "{\"output_format\":\"markdown\"}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'metadata.output_format' not available on result type
}
test "output_format_markdown" {
// Tests Markdown output format
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"output_format\":\"markdown\"}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
// skipped: field 'metadata.output_format' not available on result type
}

55
e2e/zig/src/detection_test.zig generated Normal file
View File

@@ -0,0 +1,55 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: detection
test "detect_mime_bytes_html" {
// Detect HTML MIME from bytes
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "html/html.html", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
_ = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
}
test "detect_mime_bytes_pdf" {
// Detect PDF MIME type from bytes
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
_ = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
}
test "detect_mime_bytes_png" {
// Detect PNG MIME type from bytes
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "images/test_hello_world.png", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
_ = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
}
test "get_extensions_unknown_mime" {
// get_extensions unknown MIME
suppress_abort();
const result = kreuzberg.get_extensions_for_mime("application/x-totally-unknown") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = result;
}

View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: document_extractor_management
test "document_extractors_clear" {
// Clear all document extractors and verify list is empty
suppress_abort();
_ = try kreuzberg.clear_document_extractors();
}
test "extractors_list" {
// List all registered document extractors
suppress_abort();
_ = try kreuzberg.list_document_extractors();
}

39
e2e/zig/src/embed_async_pending_test.zig generated Normal file
View File

@@ -0,0 +1,39 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: embed_async_pending
test "embed_texts_async_empty_input" {
// embed_texts_async: empty text list
suppress_abort();
_ = try kreuzberg.embed_texts_async("[]", "{}");
}
test "embed_texts_async_happy" {
// embed_texts_async: basic async embedding
suppress_abort();
_ = try kreuzberg.embed_texts_async("[\"First\",\"Second\"]", "{}");
}
test "embed_texts_async_preset_switch" {
// embed_texts_async: preset override
suppress_abort();
_ = try kreuzberg.embed_texts_async("[\"Text\"]", "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}");
}

28
e2e/zig/src/embed_extra_test.zig generated Normal file
View File

@@ -0,0 +1,28 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: embed_extra
test "embed_texts_batch" {
// Batch embed texts
suppress_abort();
const _result_json = try kreuzberg.embed_texts("[\"Hello\",\"World\"]", "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}");
defer std.heap.c_allocator.free(_result_json);
}

View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: embedding_backend_management
test "embedding_backends_clear" {
// Clear all embedding backends and verify list is empty
suppress_abort();
_ = try kreuzberg.clear_embedding_backends();
}
test "embedding_backends_list" {
// List all registered embedding backends
suppress_abort();
_ = try kreuzberg.list_embedding_backends();
}

54
e2e/zig/src/embeddings_test.zig generated Normal file
View File

@@ -0,0 +1,54 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: embeddings
test "embed_texts_different_preset" {
// embed_texts: multilingual preset
suppress_abort();
const _result_json = try kreuzberg.embed_texts("[\"Hello world\",\"Test\"]", "{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}");
defer std.heap.c_allocator.free(_result_json);
}
test "get_embedding_preset_known" {
// get_embedding_preset: known preset
suppress_abort();
_ = try kreuzberg.get_embedding_preset("balanced");
}
test "get_embedding_preset_nominal" {
// get_embedding_preset: nominal case
suppress_abort();
_ = try kreuzberg.get_embedding_preset("balanced");
}
test "get_embedding_preset_unknown" {
// get_embedding_preset: unknown preset fails
suppress_abort();
const result = try kreuzberg.get_embedding_preset("nonexistent-xyz");
try testing.expect(result == null);
}
test "list_embedding_presets_sanity" {
// list_embedding_presets: returns at least one
suppress_abort();
const result = try kreuzberg.list_embedding_presets();
try testing.expect(result.len > 0);
}

78
e2e/zig/src/error_test.zig generated Normal file
View File

@@ -0,0 +1,78 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: error
test "error_empty_bytes" {
// Graceful handling of empty bytes (should not error)
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/empty.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "text/plain", "{}");
defer std.heap.c_allocator.free(_result_json);
}
test "error_empty_mime" {
// Error when extracting with empty MIME type
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "", "{}") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = _result_json;
}
test "error_extract_bytes_conflicting_ocr" {
// extract_bytes force+disable OCR
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/fake_text.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "text/plain", "{\"disable_ocr\":true,\"force_ocr\":true}") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = _result_json;
}
test "error_invalid_mime_format" {
// Error when extracting with invalid MIME type format
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "not-a-mime", "{}") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = _result_json;
}
test "error_unsupported_mime" {
// Error when extracting with unsupported MIME type
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "application/x-nonexistent", "{}") catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = _result_json;
}

96
e2e/zig/src/format_specific_test.zig generated Normal file
View File

@@ -0,0 +1,96 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: format_specific
test "format_docx_standalone" {
// Standalone DOCX extraction using extract_bytes_sync
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "docx/fake.docx", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expect(result.object.get("content").?.string.len >= 20);
}
test "format_hwpx_standalone" {
// Standalone HWPX extraction using extract_bytes_sync
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "hwpx/simple.hwpx", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/haansofthwpx", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expect(result.object.get("content").?.string.len >= 20);
{
const _jv = result.object.get("content").?;
const _js = if (_jv == .string) _jv.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jv, .{});
defer if (_jv != .string) std.heap.c_allocator.free(_js);
try testing.expect(std.mem.indexOf(u8, _js, "Hello from HWPX") != null);
}
}
test "format_pdf_text" {
// Standalone PDF text extraction using extract_bytes_sync
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/pdf", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expect(result.object.get("content").?.string.len >= 50);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "May") != null
);
}
test "format_pptx" {
// PPTX presentation extraction using extract_file_sync
suppress_abort();
const _result_json = try kreuzberg.extract_file_sync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "{}");
defer std.heap.c_allocator.free(_result_json);
}
test "format_xlsx" {
// XLSX spreadsheet extraction using extract_file_sync
suppress_abort();
const _result_json = try kreuzberg.extract_file_sync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}");
defer std.heap.c_allocator.free(_result_json);
}

46
e2e/zig/src/mime_utilities_test.zig generated Normal file
View File

@@ -0,0 +1,46 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: mime_utilities
test "mime_detect_bytes" {
// Detect MIME type from file bytes
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const result = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
try testing.expect(std.mem.indexOf(u8, result, "pdf") != null);
}
test "mime_detect_image" {
// Detect MIME type from PNG image bytes
suppress_abort();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "images/test_hello_world.png", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const result = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
try testing.expect(std.mem.indexOf(u8, result, "png") != null);
}
test "mime_get_extensions" {
// Get file extensions for a MIME type
suppress_abort();
const result = try kreuzberg.get_extensions_for_mime("application/pdf");
try testing.expect(std.mem.indexOf(u8, result, "pdf") != null);
}

View File

@@ -0,0 +1,39 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: ocr_backend_management
test "ocr_backends_clear" {
// Clear all OCR backends and verify list is empty
suppress_abort();
_ = try kreuzberg.clear_ocr_backends();
}
test "ocr_backends_list" {
// List all registered OCR backends
suppress_abort();
_ = try kreuzberg.list_ocr_backends();
}
test "ocr_backends_unregister" {
// Unregister nonexistent OCR backend gracefully
suppress_abort();
_ = try kreuzberg.unregister_ocr_backend("nonexistent-backend-xyz");
}

42
e2e/zig/src/pdf_test.zig generated Normal file
View File

@@ -0,0 +1,42 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: pdf
test "render_pdf_page_first" {
// render_pdf_page_to_png: first page
suppress_abort();
const pdf_bytes_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(pdf_bytes_bytes);
const result = try kreuzberg.render_pdf_page_to_png(pdf_bytes_bytes, 0, null, null);
try testing.expect(result.len >= 100);
}
test "render_pdf_page_out_of_range" {
// render_pdf_page_to_png: page out of range
suppress_abort();
const pdf_bytes_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(pdf_bytes_bytes);
const result = kreuzberg.render_pdf_page_to_png(pdf_bytes_bytes, 999, null, null) catch {
try testing.expect(true); // Error occurred as expected
return;
};
_ = result;
}

141
e2e/zig/src/plugin_api_test.zig generated Normal file
View File

@@ -0,0 +1,141 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: plugin_api
test "register_document_extractor_trait_bridge" {
// register_document_extractor: trait bridge
suppress_abort();
const TestStub_register_document_extractor_trait_bridge = struct {
pub fn extract_bytes(_: *@This(), _: [*c]const u8, _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
pub fn extract_file(_: *@This(), _: [*c]const u8, _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
pub fn supported_mime_types(_: *@This()) [*c]const u8 { return "[]"; }
pub fn priority(_: *@This()) i32 { return 0; }
pub fn can_handle(_: *@This(), _: [*c]const u8, _: [*c]const u8) i32 { return 0; }
};
var stub_register_document_extractor_trait_bridge = TestStub_register_document_extractor_trait_bridge{};
const vtable_register_document_extractor_trait_bridge = kreuzberg.make_document_extractor_vtable(TestStub_register_document_extractor_trait_bridge, &stub_register_document_extractor_trait_bridge);
var out_err_register_document_extractor_trait_bridge: ?[*c]u8 = null;
_ = kreuzberg.register_document_extractor("test", vtable_register_document_extractor_trait_bridge, &stub_register_document_extractor_trait_bridge, @ptrCast(&out_err_register_document_extractor_trait_bridge));
}
test "register_embedding_backend_trait_bridge" {
// register_embedding_backend: trait bridge
suppress_abort();
const TestStub_register_embedding_backend_trait_bridge = struct {
pub fn dimensions(_: *@This()) u64 { return 0; }
pub fn embed(_: *@This(), _: [*c]const u8) ![*c]const u8 { return ""; }
};
var stub_register_embedding_backend_trait_bridge = TestStub_register_embedding_backend_trait_bridge{};
const vtable_register_embedding_backend_trait_bridge = kreuzberg.make_embedding_backend_vtable(TestStub_register_embedding_backend_trait_bridge, &stub_register_embedding_backend_trait_bridge);
var out_err_register_embedding_backend_trait_bridge: ?[*c]u8 = null;
_ = kreuzberg.register_embedding_backend("test", vtable_register_embedding_backend_trait_bridge, &stub_register_embedding_backend_trait_bridge, @ptrCast(&out_err_register_embedding_backend_trait_bridge));
}
test "register_ocr_backend_trait_bridge" {
// register_ocr_backend: trait bridge
suppress_abort();
const TestStub_register_ocr_backend_trait_bridge = struct {
pub fn process_image(_: *@This(), _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
pub fn process_image_file(_: *@This(), _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
pub fn supports_language(_: *@This(), _: [*c]const u8) i32 { return 0; }
pub fn backend_type(_: *@This()) [*c]const u8 { return "{}"; }
pub fn supported_languages(_: *@This()) [*c]const u8 { return "[]"; }
pub fn supports_table_detection(_: *@This()) i32 { return 0; }
pub fn supports_document_processing(_: *@This()) i32 { return 0; }
pub fn process_document(_: *@This(), _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
};
var stub_register_ocr_backend_trait_bridge = TestStub_register_ocr_backend_trait_bridge{};
const vtable_register_ocr_backend_trait_bridge = kreuzberg.make_ocr_backend_vtable(TestStub_register_ocr_backend_trait_bridge, &stub_register_ocr_backend_trait_bridge);
var out_err_register_ocr_backend_trait_bridge: ?[*c]u8 = null;
_ = kreuzberg.register_ocr_backend("test", vtable_register_ocr_backend_trait_bridge, &stub_register_ocr_backend_trait_bridge, @ptrCast(&out_err_register_ocr_backend_trait_bridge));
}
test "register_post_processor_trait_bridge" {
// register_post_processor: trait bridge
suppress_abort();
const TestStub_register_post_processor_trait_bridge = struct {
pub fn process(_: *@This(), _: [*c]const u8, _: [*c]const u8) !void {}
pub fn processing_stage(_: *@This()) [*c]const u8 { return "{}"; }
pub fn should_process(_: *@This(), _: [*c]const u8, _: [*c]const u8) i32 { return 0; }
pub fn estimated_duration_ms(_: *@This(), _: [*c]const u8) u64 { return 0; }
pub fn priority(_: *@This()) i32 { return 0; }
};
var stub_register_post_processor_trait_bridge = TestStub_register_post_processor_trait_bridge{};
const vtable_register_post_processor_trait_bridge = kreuzberg.make_post_processor_vtable(TestStub_register_post_processor_trait_bridge, &stub_register_post_processor_trait_bridge);
var out_err_register_post_processor_trait_bridge: ?[*c]u8 = null;
_ = kreuzberg.register_post_processor("test", vtable_register_post_processor_trait_bridge, &stub_register_post_processor_trait_bridge, @ptrCast(&out_err_register_post_processor_trait_bridge));
}
test "register_renderer_trait_bridge" {
// register_renderer: trait bridge
suppress_abort();
const TestStub_register_renderer_trait_bridge = struct {
pub fn render(_: *@This(), _: [*c]const u8) ![*c]const u8 { return ""; }
};
var stub_register_renderer_trait_bridge = TestStub_register_renderer_trait_bridge{};
const vtable_register_renderer_trait_bridge = kreuzberg.make_renderer_vtable(TestStub_register_renderer_trait_bridge, &stub_register_renderer_trait_bridge);
var out_err_register_renderer_trait_bridge: ?[*c]u8 = null;
_ = kreuzberg.register_renderer("test", vtable_register_renderer_trait_bridge, &stub_register_renderer_trait_bridge, @ptrCast(&out_err_register_renderer_trait_bridge));
}
test "register_validator_trait_bridge" {
// register_validator: trait bridge
suppress_abort();
const TestStub_register_validator_trait_bridge = struct {
pub fn validate(_: *@This(), _: [*c]const u8, _: [*c]const u8) !void {}
pub fn should_validate(_: *@This(), _: [*c]const u8, _: [*c]const u8) i32 { return 0; }
pub fn priority(_: *@This()) i32 { return 0; }
};
var stub_register_validator_trait_bridge = TestStub_register_validator_trait_bridge{};
const vtable_register_validator_trait_bridge = kreuzberg.make_validator_vtable(TestStub_register_validator_trait_bridge, &stub_register_validator_trait_bridge);
var out_err_register_validator_trait_bridge: ?[*c]u8 = null;
_ = kreuzberg.register_validator("test", vtable_register_validator_trait_bridge, &stub_register_validator_trait_bridge, @ptrCast(&out_err_register_validator_trait_bridge));
}
test "unregister_document_extractor_after_register" {
// unregister_document_extractor
suppress_abort();
_ = try kreuzberg.unregister_document_extractor("test-extractor");
}
test "unregister_embedding_backend_after_register" {
// unregister_embedding_backend
suppress_abort();
_ = try kreuzberg.unregister_embedding_backend("test-embedding-backend");
}
test "unregister_post_processor_after_register" {
// unregister_post_processor
suppress_abort();
_ = try kreuzberg.unregister_post_processor("test-processor");
}
test "unregister_renderer_after_register" {
// unregister_renderer
suppress_abort();
_ = try kreuzberg.unregister_renderer("test-renderer");
}
test "unregister_validator_after_register" {
// unregister_validator
suppress_abort();
_ = try kreuzberg.unregister_validator("test-validator");
}

View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: post_processor_management
test "post_processors_clear" {
// Clear all post-processors and verify list is empty
suppress_abort();
_ = try kreuzberg.clear_post_processors();
}
test "post_processors_list" {
// List all registered post-processors
suppress_abort();
_ = try kreuzberg.list_post_processors();
}

39
e2e/zig/src/registry_operations_test.zig generated Normal file
View File

@@ -0,0 +1,39 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: registry_operations
test "extensions_docx" {
// Get file extensions for DOCX MIME type
suppress_abort();
_ = try kreuzberg.get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
}
test "extensions_html" {
// Get file extensions for HTML MIME type
suppress_abort();
_ = try kreuzberg.get_extensions_for_mime("text/html");
}
test "extensions_pdf" {
// Get file extensions for PDF MIME type
suppress_abort();
_ = try kreuzberg.get_extensions_for_mime("application/pdf");
}

57
e2e/zig/src/registry_test.zig generated Normal file
View File

@@ -0,0 +1,57 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: registry
test "list_document_extractors" {
// List document extractors
suppress_abort();
_ = try kreuzberg.list_document_extractors();
}
test "list_embedding_backends" {
// List embedding backends
suppress_abort();
_ = try kreuzberg.list_embedding_backends();
}
test "list_ocr_backends" {
// List OCR backends
suppress_abort();
_ = try kreuzberg.list_ocr_backends();
}
test "list_post_processors" {
// List post-processors
suppress_abort();
_ = try kreuzberg.list_post_processors();
}
test "list_renderers" {
// List renderers
suppress_abort();
_ = try kreuzberg.list_renderers();
}
test "list_validators" {
// List validators
suppress_abort();
_ = try kreuzberg.list_validators();
}

33
e2e/zig/src/renderer_management_test.zig generated Normal file
View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: renderer_management
test "renderers_clear" {
// Clear all renderers and verify list is empty
suppress_abort();
_ = try kreuzberg.clear_renderers();
}
test "renderers_list" {
// List all registered renderers
suppress_abort();
_ = try kreuzberg.list_renderers();
}

229
e2e/zig/src/smoke_test.zig generated Normal file
View File

@@ -0,0 +1,229 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: smoke
test "ocr_image_png" {
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "images/test_hello_world.png", std.heap.c_allocator, .unlimited);
defer std.heap.c_allocator.free(content_bytes);
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "image/png", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("image/png", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 1);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "Hello") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "World") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "hello") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "world") != null
);
}
test "smoke_docx_basic" {
// Smoke test: DOCX with formatted text
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/vnd.openxmlformats-officedocument.wordprocessingml.document", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 20);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "Lorem") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "ipsum") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "document") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "text") != null
);
}
test "smoke_html_basic" {
// Smoke test: HTML table extraction
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("html/simple_table.html", "text/html", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("text/html", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 10);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "Sample Data Table") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Laptop") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Electronics") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "Product") != null
);
}
test "smoke_image_png" {
// Smoke test: PNG image (without OCR, metadata only)
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("images/sample.png", null, "{\"disable_ocr\":true}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("image/png", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
}
test "smoke_json_basic" {
// Smoke test: JSON file extraction
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("json/simple.json", "application/json", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/json", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 5);
}
test "smoke_pdf_basic" {
// Smoke test: PDF with simple text extraction
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", "application/pdf", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 50);
try testing.expect(
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
std.mem.indexOf(u8, result.object.get("content").?.string, "To Whom it May Concern") != null
);
}
test "smoke_txt_basic" {
// Smoke test: Plain text file
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("text/report.txt", "text/plain", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("text/plain", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 5);
}
test "smoke_xlsx_basic" {
// Smoke test: XLSX with basic spreadsheet data including tables
suppress_abort();
var gpa: std.heap.DebugAllocator(.{}) = .init;
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const _result_json = try kreuzberg.extract_file_sync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}");
defer std.heap.c_allocator.free(_result_json);
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
defer _parsed.deinit();
const result = &_parsed.value;
try testing.expectEqualStrings("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
try testing.expect(result.object.get("content").?.string.len >= 100);
{
const _jva0 = result.object.get("content").?;
const _jsa0 = if (_jva0 == .string) _jva0.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva0, .{});
defer if (_jva0 != .string) std.heap.c_allocator.free(_jsa0);
try testing.expect(std.mem.indexOf(u8, _jsa0, "Team") != null);
}
{
const _jva1 = result.object.get("content").?;
const _jsa1 = if (_jva1 == .string) _jva1.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva1, .{});
defer if (_jva1 != .string) std.heap.c_allocator.free(_jsa1);
try testing.expect(std.mem.indexOf(u8, _jsa1, "Location") != null);
}
{
const _jva2 = result.object.get("content").?;
const _jsa2 = if (_jva2 == .string) _jva2.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva2, .{});
defer if (_jva2 != .string) std.heap.c_allocator.free(_jsa2);
try testing.expect(std.mem.indexOf(u8, _jsa2, "Stanley Cups") != null);
}
{
const _jva3 = result.object.get("content").?;
const _jsa3 = if (_jva3 == .string) _jva3.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva3, .{});
defer if (_jva3 != .string) std.heap.c_allocator.free(_jsa3);
try testing.expect(std.mem.indexOf(u8, _jsa3, "Blues") != null);
}
{
const _jva4 = result.object.get("content").?;
const _jsa4 = if (_jva4 == .string) _jva4.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva4, .{});
defer if (_jva4 != .string) std.heap.c_allocator.free(_jsa4);
try testing.expect(std.mem.indexOf(u8, _jsa4, "Flyers") != null);
}
{
const _jva5 = result.object.get("content").?;
const _jsa5 = if (_jva5 == .string) _jva5.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva5, .{});
defer if (_jva5 != .string) std.heap.c_allocator.free(_jsa5);
try testing.expect(std.mem.indexOf(u8, _jsa5, "Maple Leafs") != null);
}
{
const _jva6 = result.object.get("content").?;
const _jsa6 = if (_jva6 == .string) _jva6.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva6, .{});
defer if (_jva6 != .string) std.heap.c_allocator.free(_jsa6);
try testing.expect(std.mem.indexOf(u8, _jsa6, "STL") != null);
}
{
const _jva7 = result.object.get("content").?;
const _jsa7 = if (_jva7 == .string) _jva7.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva7, .{});
defer if (_jva7 != .string) std.heap.c_allocator.free(_jsa7);
try testing.expect(std.mem.indexOf(u8, _jsa7, "PHI") != null);
}
{
const _jva8 = result.object.get("content").?;
const _jsa8 = if (_jva8 == .string) _jva8.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva8, .{});
defer if (_jva8 != .string) std.heap.c_allocator.free(_jsa8);
try testing.expect(std.mem.indexOf(u8, _jsa8, "TOR") != null);
}
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}

View File

@@ -0,0 +1,33 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
const std = @import("std");
const testing = std.testing;
const kreuzberg = @import("kreuzberg");
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
extern "c" fn signal(sig: i32, handler: usize) usize;
var _abort_handler_installed: bool = false;
fn suppress_abort() void {
if (!_abort_handler_installed) {
// SIGABRT = 6 on POSIX; SIG_IGN = 1
_ = signal(6, 1);
_abort_handler_installed = true;
}
}
// E2e tests for category: validator_management
test "validators_clear" {
// Clear all validators and verify list is empty
suppress_abort();
_ = try kreuzberg.clear_validators();
}
test "validators_list" {
// List all registered validators
suppress_abort();
_ = try kreuzberg.list_validators();
}