This commit is contained in:
373
e2e/zig/build.zig
generated
Normal file
373
e2e/zig/build.zig
generated
Normal file
@@ -0,0 +1,373 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
const test_step = b.step("test", "Run tests");
|
||||
const ffi_path = b.option([]const u8, "ffi_path", "Path to directory containing libkreuzberg_ffi") orelse "../../target/release";
|
||||
const ffi_include = b.option([]const u8, "ffi_include_path", "Path to directory containing FFI header") orelse "../../crates/kreuzberg-ffi/include";
|
||||
|
||||
const kreuzberg_module = b.addModule("kreuzberg", .{
|
||||
.root_source_file = b.path("../../packages/zig/src/kreuzberg.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
kreuzberg_module.addLibraryPath(.{ .cwd_relative = ffi_path });
|
||||
kreuzberg_module.addIncludePath(.{ .cwd_relative = ffi_include });
|
||||
kreuzberg_module.linkSystemLibrary("kreuzberg_ffi", .{});
|
||||
|
||||
const async_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/async_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
async_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const async_tests = b.addTest(.{
|
||||
.name = "async_test",
|
||||
.root_module = async_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const async_run = b.addRunArtifact(async_tests);
|
||||
async_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&async_run.step);
|
||||
|
||||
const batch_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/batch_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
batch_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const batch_tests = b.addTest(.{
|
||||
.name = "batch_test",
|
||||
.root_module = batch_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const batch_run = b.addRunArtifact(batch_tests);
|
||||
batch_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&batch_run.step);
|
||||
|
||||
const code_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/code_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
code_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const code_tests = b.addTest(.{
|
||||
.name = "code_test",
|
||||
.root_module = code_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const code_run = b.addRunArtifact(code_tests);
|
||||
code_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&code_run.step);
|
||||
|
||||
const contract_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/contract_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
contract_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const contract_tests = b.addTest(.{
|
||||
.name = "contract_test",
|
||||
.root_module = contract_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const contract_run = b.addRunArtifact(contract_tests);
|
||||
contract_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&contract_run.step);
|
||||
|
||||
const detection_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/detection_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
detection_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const detection_tests = b.addTest(.{
|
||||
.name = "detection_test",
|
||||
.root_module = detection_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const detection_run = b.addRunArtifact(detection_tests);
|
||||
detection_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&detection_run.step);
|
||||
|
||||
const document_extractor_management_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/document_extractor_management_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
document_extractor_management_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const document_extractor_management_tests = b.addTest(.{
|
||||
.name = "document_extractor_management_test",
|
||||
.root_module = document_extractor_management_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const document_extractor_management_run = b.addRunArtifact(document_extractor_management_tests);
|
||||
document_extractor_management_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&document_extractor_management_run.step);
|
||||
|
||||
const embed_async_pending_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/embed_async_pending_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
embed_async_pending_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const embed_async_pending_tests = b.addTest(.{
|
||||
.name = "embed_async_pending_test",
|
||||
.root_module = embed_async_pending_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const embed_async_pending_run = b.addRunArtifact(embed_async_pending_tests);
|
||||
embed_async_pending_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&embed_async_pending_run.step);
|
||||
|
||||
const embed_extra_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/embed_extra_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
embed_extra_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const embed_extra_tests = b.addTest(.{
|
||||
.name = "embed_extra_test",
|
||||
.root_module = embed_extra_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const embed_extra_run = b.addRunArtifact(embed_extra_tests);
|
||||
embed_extra_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&embed_extra_run.step);
|
||||
|
||||
const embedding_backend_management_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/embedding_backend_management_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
embedding_backend_management_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const embedding_backend_management_tests = b.addTest(.{
|
||||
.name = "embedding_backend_management_test",
|
||||
.root_module = embedding_backend_management_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const embedding_backend_management_run = b.addRunArtifact(embedding_backend_management_tests);
|
||||
embedding_backend_management_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&embedding_backend_management_run.step);
|
||||
|
||||
const embeddings_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/embeddings_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
embeddings_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const embeddings_tests = b.addTest(.{
|
||||
.name = "embeddings_test",
|
||||
.root_module = embeddings_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const embeddings_run = b.addRunArtifact(embeddings_tests);
|
||||
embeddings_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&embeddings_run.step);
|
||||
|
||||
const error_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/error_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
error_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const error_tests = b.addTest(.{
|
||||
.name = "error_test",
|
||||
.root_module = error_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const error_run = b.addRunArtifact(error_tests);
|
||||
error_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&error_run.step);
|
||||
|
||||
const format_specific_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/format_specific_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
format_specific_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const format_specific_tests = b.addTest(.{
|
||||
.name = "format_specific_test",
|
||||
.root_module = format_specific_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const format_specific_run = b.addRunArtifact(format_specific_tests);
|
||||
format_specific_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&format_specific_run.step);
|
||||
|
||||
const mime_utilities_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/mime_utilities_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
mime_utilities_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const mime_utilities_tests = b.addTest(.{
|
||||
.name = "mime_utilities_test",
|
||||
.root_module = mime_utilities_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const mime_utilities_run = b.addRunArtifact(mime_utilities_tests);
|
||||
mime_utilities_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&mime_utilities_run.step);
|
||||
|
||||
const ocr_backend_management_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/ocr_backend_management_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
ocr_backend_management_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const ocr_backend_management_tests = b.addTest(.{
|
||||
.name = "ocr_backend_management_test",
|
||||
.root_module = ocr_backend_management_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const ocr_backend_management_run = b.addRunArtifact(ocr_backend_management_tests);
|
||||
ocr_backend_management_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&ocr_backend_management_run.step);
|
||||
|
||||
const pdf_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/pdf_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
pdf_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const pdf_tests = b.addTest(.{
|
||||
.name = "pdf_test",
|
||||
.root_module = pdf_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const pdf_run = b.addRunArtifact(pdf_tests);
|
||||
pdf_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&pdf_run.step);
|
||||
|
||||
const plugin_api_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/plugin_api_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
plugin_api_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const plugin_api_tests = b.addTest(.{
|
||||
.name = "plugin_api_test",
|
||||
.root_module = plugin_api_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const plugin_api_run = b.addRunArtifact(plugin_api_tests);
|
||||
plugin_api_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&plugin_api_run.step);
|
||||
|
||||
const post_processor_management_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/post_processor_management_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
post_processor_management_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const post_processor_management_tests = b.addTest(.{
|
||||
.name = "post_processor_management_test",
|
||||
.root_module = post_processor_management_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const post_processor_management_run = b.addRunArtifact(post_processor_management_tests);
|
||||
post_processor_management_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&post_processor_management_run.step);
|
||||
|
||||
const registry_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/registry_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
registry_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const registry_tests = b.addTest(.{
|
||||
.name = "registry_test",
|
||||
.root_module = registry_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const registry_run = b.addRunArtifact(registry_tests);
|
||||
registry_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(®istry_run.step);
|
||||
|
||||
const registry_operations_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/registry_operations_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
registry_operations_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const registry_operations_tests = b.addTest(.{
|
||||
.name = "registry_operations_test",
|
||||
.root_module = registry_operations_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const registry_operations_run = b.addRunArtifact(registry_operations_tests);
|
||||
registry_operations_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(®istry_operations_run.step);
|
||||
|
||||
const renderer_management_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/renderer_management_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
renderer_management_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const renderer_management_tests = b.addTest(.{
|
||||
.name = "renderer_management_test",
|
||||
.root_module = renderer_management_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const renderer_management_run = b.addRunArtifact(renderer_management_tests);
|
||||
renderer_management_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&renderer_management_run.step);
|
||||
|
||||
const smoke_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/smoke_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
smoke_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const smoke_tests = b.addTest(.{
|
||||
.name = "smoke_test",
|
||||
.root_module = smoke_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const smoke_run = b.addRunArtifact(smoke_tests);
|
||||
smoke_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&smoke_run.step);
|
||||
|
||||
const validator_management_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/validator_management_test.zig"),
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
.link_libc = true,
|
||||
});
|
||||
validator_management_module.addImport("kreuzberg", kreuzberg_module);
|
||||
const validator_management_tests = b.addTest(.{
|
||||
.name = "validator_management_test",
|
||||
.root_module = validator_management_module,
|
||||
.use_llvm = true,
|
||||
});
|
||||
const validator_management_run = b.addRunArtifact(validator_management_tests);
|
||||
validator_management_run.setCwd(b.path("../../test_documents"));
|
||||
test_step.dependOn(&validator_management_run.step);
|
||||
|
||||
}
|
||||
16
e2e/zig/build.zig.zon
generated
Normal file
16
e2e/zig/build.zig.zon
generated
Normal file
@@ -0,0 +1,16 @@
|
||||
.{
|
||||
.name = .e2e_zig,
|
||||
.version = "0.1.0",
|
||||
.fingerprint = 0xf16334c0592376fc,
|
||||
.minimum_zig_version = "0.16.0",
|
||||
.dependencies = .{
|
||||
.kreuzberg = .{
|
||||
.path = "../../packages/zig",
|
||||
},
|
||||
},
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
"src",
|
||||
},
|
||||
}
|
||||
63
e2e/zig/src/async_test.zig
generated
Normal file
63
e2e/zig/src/async_test.zig
generated
Normal file
@@ -0,0 +1,63 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: async
|
||||
|
||||
test "async_extract_bytes" {
|
||||
// Async extract_bytes call on PDF document
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/pdf", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 50);
|
||||
}
|
||||
|
||||
test "async_extract_bytes_empty_mime" {
|
||||
// extract_bytes empty MIME async
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "", "{}") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = _result_json;
|
||||
}
|
||||
|
||||
test "async_extract_bytes_invalid_mime" {
|
||||
// extract_bytes unsupported MIME async
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "application/x-nonexistent", "{}") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = _result_json;
|
||||
}
|
||||
87
e2e/zig/src/batch_test.zig
generated
Normal file
87
e2e/zig/src/batch_test.zig
generated
Normal file
@@ -0,0 +1,87 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: batch
|
||||
|
||||
test "batch_bytes_invalid_mime" {
|
||||
// batch_extract_bytes_sync invalid MIME
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[72,101,108,108,111],\"mime_type\":\"application/x-nonexistent\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_extract_bytes_happy" {
|
||||
// batch_extract_bytes: happy path with mixed inputs
|
||||
suppress_abort();
|
||||
const result = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[72,101,108,108,111,44,32,119,111,114,108,100,33],\"mime_type\":\"text/plain\"},{\"content\":[60,104,116,109,108,62,60,98,111,100,121,62,84,101,115,116,60,47,98,111,100,121,62,60,47,104,116,109,108,62],\"mime_type\":\"text/html\"}]", "{}");
|
||||
try testing.expect(result.len >= 1);
|
||||
}
|
||||
|
||||
test "batch_extract_bytes_mixed_format" {
|
||||
// batch_extract_bytes: handles unsupported MIME gracefully
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[80,68,70,32,112,108,97,99,101,104,111,108,100,101,114],\"mime_type\":\"application/x-unknown\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_extract_bytes_sync_empty_list" {
|
||||
// batch_extract_bytes_sync: empty batch
|
||||
suppress_abort();
|
||||
const result = try kreuzberg.batch_extract_bytes_sync("[]", "{}");
|
||||
{
|
||||
var _cparse = try std.json.parseFromSlice(std.json.Value, std.heap.c_allocator, result, .{});
|
||||
defer _cparse.deinit();
|
||||
try testing.expectEqual(0, _cparse.value.array.items.len);
|
||||
}
|
||||
}
|
||||
|
||||
test "batch_extract_bytes_sync_invalid_mime" {
|
||||
// batch_extract_bytes_sync: unsupported MIME
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_bytes_sync("[{\"content\":[100,97,116,97],\"mime_type\":\"application/x-unknown\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_file_async_basic" {
|
||||
// Extract text from multiple files asynchronously
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"pdf/fake_memo.pdf\"},{\"path\":\"text/fake_text.txt\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_file_async_not_found" {
|
||||
// batch_extract_file async nonexistent
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"/nonexistent/a.pdf\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_file_not_found" {
|
||||
// batch_extract_file_sync nonexistent
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"/nonexistent/a.pdf\"},{\"path\":\"/nonexistent/b.txt\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_file_partial" {
|
||||
// batch_extract_file_sync mixed
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"text/plain.txt\"},{\"path\":\"/nonexistent/missing.pdf\"}]", "{}");
|
||||
}
|
||||
|
||||
test "batch_file_sync_basic" {
|
||||
// Extract text from multiple files synchronously
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.batch_extract_files_sync("[{\"path\":\"pdf/fake_memo.pdf\"},{\"path\":\"text/fake_text.txt\"}]", "{}");
|
||||
}
|
||||
49
e2e/zig/src/code_test.zig
generated
Normal file
49
e2e/zig/src/code_test.zig
generated
Normal file
@@ -0,0 +1,49 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: code
|
||||
|
||||
test "code_shebang_detection" {
|
||||
// Test language detection from shebang line via bytes input
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("code/script.sh", "text/x-source-code", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("text/x-source-code", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
{
|
||||
const _jva0 = result.object.get("content").?;
|
||||
const _jsa0 = if (_jva0 == .string) _jva0.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva0, .{});
|
||||
defer if (_jva0 != .string) std.heap.c_allocator.free(_jsa0);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa0, "build") != null);
|
||||
}
|
||||
{
|
||||
const _jva1 = result.object.get("content").?;
|
||||
const _jsa1 = if (_jva1 == .string) _jva1.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva1, .{});
|
||||
defer if (_jva1 != .string) std.heap.c_allocator.free(_jsa1);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa1, "clean") != null);
|
||||
}
|
||||
}
|
||||
346
e2e/zig/src/contract_test.zig
generated
Normal file
346
e2e/zig/src/contract_test.zig
generated
Normal file
@@ -0,0 +1,346 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: contract
|
||||
|
||||
test "api_batch_bytes_async" {
|
||||
// Tests async batch bytes extraction API (batch_extract_bytes)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "api_batch_bytes_with_configs_async" {
|
||||
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"output_format\":\"markdown\"}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
test "api_batch_file_async" {
|
||||
// Tests async batch file extraction API (batch_extract_file)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "api_batch_file_with_configs_async" {
|
||||
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"output_format\":\"markdown\"}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
test "api_extract_bytes_async" {
|
||||
// Tests async bytes extraction API (extract_bytes)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "api_extract_file_async" {
|
||||
// Tests async file extraction API (extract_file)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "config_chunking_prepend_heading_context" {
|
||||
// Tests markdown chunker prepends heading hierarchy to chunk content
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("markdown/extraction_test.md", null, "{\"chunking\":{\"chunker_type\":\"markdown\",\"max_chars\":300,\"max_overlap\":50,\"prepend_heading_context\":true}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'chunks' not available on result type
|
||||
{
|
||||
const _chunks_opt = result.object.get("chunks");
|
||||
var _all: bool = true;
|
||||
if (_chunks_opt) |_chunks_val| {
|
||||
if (_chunks_val == .array) {
|
||||
if (_chunks_val.array.items.len == 0) _all = false;
|
||||
for (_chunks_val.array.items) |c| {
|
||||
if (c != .object) { _all = false; break; }
|
||||
const _v = c.object.get("content");
|
||||
if (_v == null or _v.? != .string or _v.?.string.len == 0) { _all = false; break; }
|
||||
}
|
||||
} else { _all = false; }
|
||||
} else { _all = false; }
|
||||
try testing.expect(_all);
|
||||
}
|
||||
// skipped: synthetic field 'chunks_have_heading_context' not derivable from JSON value alone
|
||||
// skipped: synthetic field 'first_chunk_starts_with_heading' not derivable from JSON value alone
|
||||
}
|
||||
|
||||
test "config_document_structure_with_headings" {
|
||||
// Tests document structure with DOCX heading-driven nesting
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("docx/fake.docx", null, "{\"include_document_structure\":true}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/vnd.openxmlformats-officedocument.wordprocessingml.document", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
// skipped: field 'document' not available on result type
|
||||
// skipped: field 'document.nodes' not available on result type
|
||||
}
|
||||
|
||||
test "config_element_types" {
|
||||
// Tests element-based result format with element type assertions on DOCX
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("docx/unit_test_headers.docx", null, "{\"result_format\":\"element_based\"}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("mime_type").?.string, "application/vnd.openxmlformats-officedocument.wordprocessingml.document") != null
|
||||
);
|
||||
// skipped: field 'elements' not available on result type
|
||||
}
|
||||
|
||||
test "config_extraction_timeout" {
|
||||
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"extraction_timeout_secs\":300}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
}
|
||||
|
||||
test "config_keywords" {
|
||||
// Tests keyword extraction via YAKE algorithm
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"keywords\":{\"algorithm\":\"yake\",\"max_keywords\":10}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'keywords' not available on JSON-struct ExtractionResult
|
||||
// skipped: field 'keywords' not available on JSON-struct ExtractionResult
|
||||
}
|
||||
|
||||
test "config_pages" {
|
||||
// Tests page extraction and page marker configuration
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"pages\":{\"extract_pages\":true,\"insert_page_markers\":true}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "PAGE") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "config_quality_enabled" {
|
||||
// Tests quality scoring produces a score value in [0.0, 1.0]
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"enable_quality_processing\":true}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'quality_score' not available on result type
|
||||
// skipped: field 'quality_score' not available on result type
|
||||
// skipped: field 'quality_score' not available on result type
|
||||
}
|
||||
|
||||
test "config_security_limits" {
|
||||
// Tests archive extraction with custom security limits
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("archives/documents.zip", null, "{\"security_limits\":{\"max_archive_size\":104857600,\"max_compression_ratio\":50,\"max_files_in_archive\":100}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("mime_type").?.string, "application/zip") != null or
|
||||
std.mem.indexOf(u8, result.object.get("mime_type").?.string, "application/x-zip-compressed") != null
|
||||
);
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
}
|
||||
|
||||
test "config_tree_sitter" {
|
||||
// Tests tree-sitter configuration round-trip
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("code/hello.py", null, "{\"tree_sitter\":{\"groups\":[\"web\"],\"languages\":[\"python\",\"rust\"],\"process\":{\"comments\":false,\"diagnostics\":false,\"docstrings\":false,\"exports\":true,\"imports\":true,\"structure\":true,\"symbols\":false}}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("text/x-source-code", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 5);
|
||||
}
|
||||
|
||||
test "output_format_bytes_markdown" {
|
||||
// Tests markdown output format via bytes extraction API
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/pdf", "{\"output_format\":\"markdown\"}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
|
||||
test "output_format_markdown" {
|
||||
// Tests Markdown output format
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", null, "{\"output_format\":\"markdown\"}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
// skipped: field 'metadata.output_format' not available on result type
|
||||
}
|
||||
55
e2e/zig/src/detection_test.zig
generated
Normal file
55
e2e/zig/src/detection_test.zig
generated
Normal file
@@ -0,0 +1,55 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: detection
|
||||
|
||||
test "detect_mime_bytes_html" {
|
||||
// Detect HTML MIME from bytes
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "html/html.html", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
_ = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
|
||||
}
|
||||
|
||||
test "detect_mime_bytes_pdf" {
|
||||
// Detect PDF MIME type from bytes
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
_ = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
|
||||
}
|
||||
|
||||
test "detect_mime_bytes_png" {
|
||||
// Detect PNG MIME type from bytes
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "images/test_hello_world.png", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
_ = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
|
||||
}
|
||||
|
||||
test "get_extensions_unknown_mime" {
|
||||
// get_extensions unknown MIME
|
||||
suppress_abort();
|
||||
const result = kreuzberg.get_extensions_for_mime("application/x-totally-unknown") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = result;
|
||||
}
|
||||
33
e2e/zig/src/document_extractor_management_test.zig
generated
Normal file
33
e2e/zig/src/document_extractor_management_test.zig
generated
Normal file
@@ -0,0 +1,33 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: document_extractor_management
|
||||
|
||||
test "document_extractors_clear" {
|
||||
// Clear all document extractors and verify list is empty
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.clear_document_extractors();
|
||||
}
|
||||
|
||||
test "extractors_list" {
|
||||
// List all registered document extractors
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_document_extractors();
|
||||
}
|
||||
39
e2e/zig/src/embed_async_pending_test.zig
generated
Normal file
39
e2e/zig/src/embed_async_pending_test.zig
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: embed_async_pending
|
||||
|
||||
test "embed_texts_async_empty_input" {
|
||||
// embed_texts_async: empty text list
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.embed_texts_async("[]", "{}");
|
||||
}
|
||||
|
||||
test "embed_texts_async_happy" {
|
||||
// embed_texts_async: basic async embedding
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.embed_texts_async("[\"First\",\"Second\"]", "{}");
|
||||
}
|
||||
|
||||
test "embed_texts_async_preset_switch" {
|
||||
// embed_texts_async: preset override
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.embed_texts_async("[\"Text\"]", "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}");
|
||||
}
|
||||
28
e2e/zig/src/embed_extra_test.zig
generated
Normal file
28
e2e/zig/src/embed_extra_test.zig
generated
Normal file
@@ -0,0 +1,28 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: embed_extra
|
||||
|
||||
test "embed_texts_batch" {
|
||||
// Batch embed texts
|
||||
suppress_abort();
|
||||
const _result_json = try kreuzberg.embed_texts("[\"Hello\",\"World\"]", "{\"model\":{\"name\":\"balanced\",\"type\":\"preset\"}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
}
|
||||
33
e2e/zig/src/embedding_backend_management_test.zig
generated
Normal file
33
e2e/zig/src/embedding_backend_management_test.zig
generated
Normal file
@@ -0,0 +1,33 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: embedding_backend_management
|
||||
|
||||
test "embedding_backends_clear" {
|
||||
// Clear all embedding backends and verify list is empty
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.clear_embedding_backends();
|
||||
}
|
||||
|
||||
test "embedding_backends_list" {
|
||||
// List all registered embedding backends
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_embedding_backends();
|
||||
}
|
||||
54
e2e/zig/src/embeddings_test.zig
generated
Normal file
54
e2e/zig/src/embeddings_test.zig
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: embeddings
|
||||
|
||||
test "embed_texts_different_preset" {
|
||||
// embed_texts: multilingual preset
|
||||
suppress_abort();
|
||||
const _result_json = try kreuzberg.embed_texts("[\"Hello world\",\"Test\"]", "{\"model\":{\"name\":\"multilingual\",\"type\":\"preset\"}}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
}
|
||||
|
||||
test "get_embedding_preset_known" {
|
||||
// get_embedding_preset: known preset
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.get_embedding_preset("balanced");
|
||||
}
|
||||
|
||||
test "get_embedding_preset_nominal" {
|
||||
// get_embedding_preset: nominal case
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.get_embedding_preset("balanced");
|
||||
}
|
||||
|
||||
test "get_embedding_preset_unknown" {
|
||||
// get_embedding_preset: unknown preset fails
|
||||
suppress_abort();
|
||||
const result = try kreuzberg.get_embedding_preset("nonexistent-xyz");
|
||||
try testing.expect(result == null);
|
||||
}
|
||||
|
||||
test "list_embedding_presets_sanity" {
|
||||
// list_embedding_presets: returns at least one
|
||||
suppress_abort();
|
||||
const result = try kreuzberg.list_embedding_presets();
|
||||
try testing.expect(result.len > 0);
|
||||
}
|
||||
78
e2e/zig/src/error_test.zig
generated
Normal file
78
e2e/zig/src/error_test.zig
generated
Normal file
@@ -0,0 +1,78 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: error
|
||||
|
||||
test "error_empty_bytes" {
|
||||
// Graceful handling of empty bytes (should not error)
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/empty.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "text/plain", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
}
|
||||
|
||||
test "error_empty_mime" {
|
||||
// Error when extracting with empty MIME type
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "", "{}") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = _result_json;
|
||||
}
|
||||
|
||||
test "error_extract_bytes_conflicting_ocr" {
|
||||
// extract_bytes force+disable OCR
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/fake_text.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "text/plain", "{\"disable_ocr\":true,\"force_ocr\":true}") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = _result_json;
|
||||
}
|
||||
|
||||
test "error_invalid_mime_format" {
|
||||
// Error when extracting with invalid MIME type format
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "not-a-mime", "{}") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = _result_json;
|
||||
}
|
||||
|
||||
test "error_unsupported_mime" {
|
||||
// Error when extracting with unsupported MIME type
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "text/plain.txt", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = kreuzberg.extract_bytes_sync(content_bytes, "application/x-nonexistent", "{}") catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = _result_json;
|
||||
}
|
||||
96
e2e/zig/src/format_specific_test.zig
generated
Normal file
96
e2e/zig/src/format_specific_test.zig
generated
Normal file
@@ -0,0 +1,96 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: format_specific
|
||||
|
||||
test "format_docx_standalone" {
|
||||
// Standalone DOCX extraction using extract_bytes_sync
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "docx/fake.docx", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expect(result.object.get("content").?.string.len >= 20);
|
||||
}
|
||||
|
||||
test "format_hwpx_standalone" {
|
||||
// Standalone HWPX extraction using extract_bytes_sync
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "hwpx/simple.hwpx", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/haansofthwpx", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expect(result.object.get("content").?.string.len >= 20);
|
||||
{
|
||||
const _jv = result.object.get("content").?;
|
||||
const _js = if (_jv == .string) _jv.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jv, .{});
|
||||
defer if (_jv != .string) std.heap.c_allocator.free(_js);
|
||||
try testing.expect(std.mem.indexOf(u8, _js, "Hello from HWPX") != null);
|
||||
}
|
||||
}
|
||||
|
||||
test "format_pdf_text" {
|
||||
// Standalone PDF text extraction using extract_bytes_sync
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "application/pdf", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expect(result.object.get("content").?.string.len >= 50);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Mallori") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "May") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "format_pptx" {
|
||||
// PPTX presentation extraction using extract_file_sync
|
||||
suppress_abort();
|
||||
const _result_json = try kreuzberg.extract_file_sync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
}
|
||||
|
||||
test "format_xlsx" {
|
||||
// XLSX spreadsheet extraction using extract_file_sync
|
||||
suppress_abort();
|
||||
const _result_json = try kreuzberg.extract_file_sync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
}
|
||||
46
e2e/zig/src/mime_utilities_test.zig
generated
Normal file
46
e2e/zig/src/mime_utilities_test.zig
generated
Normal file
@@ -0,0 +1,46 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: mime_utilities
|
||||
|
||||
test "mime_detect_bytes" {
|
||||
// Detect MIME type from file bytes
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const result = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
|
||||
try testing.expect(std.mem.indexOf(u8, result, "pdf") != null);
|
||||
}
|
||||
|
||||
test "mime_detect_image" {
|
||||
// Detect MIME type from PNG image bytes
|
||||
suppress_abort();
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "images/test_hello_world.png", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const result = try kreuzberg.detect_mime_type_from_bytes(content_bytes);
|
||||
try testing.expect(std.mem.indexOf(u8, result, "png") != null);
|
||||
}
|
||||
|
||||
test "mime_get_extensions" {
|
||||
// Get file extensions for a MIME type
|
||||
suppress_abort();
|
||||
const result = try kreuzberg.get_extensions_for_mime("application/pdf");
|
||||
try testing.expect(std.mem.indexOf(u8, result, "pdf") != null);
|
||||
}
|
||||
39
e2e/zig/src/ocr_backend_management_test.zig
generated
Normal file
39
e2e/zig/src/ocr_backend_management_test.zig
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: ocr_backend_management
|
||||
|
||||
test "ocr_backends_clear" {
|
||||
// Clear all OCR backends and verify list is empty
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.clear_ocr_backends();
|
||||
}
|
||||
|
||||
test "ocr_backends_list" {
|
||||
// List all registered OCR backends
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_ocr_backends();
|
||||
}
|
||||
|
||||
test "ocr_backends_unregister" {
|
||||
// Unregister nonexistent OCR backend gracefully
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.unregister_ocr_backend("nonexistent-backend-xyz");
|
||||
}
|
||||
42
e2e/zig/src/pdf_test.zig
generated
Normal file
42
e2e/zig/src/pdf_test.zig
generated
Normal file
@@ -0,0 +1,42 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: pdf
|
||||
|
||||
test "render_pdf_page_first" {
|
||||
// render_pdf_page_to_png: first page
|
||||
suppress_abort();
|
||||
const pdf_bytes_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(pdf_bytes_bytes);
|
||||
const result = try kreuzberg.render_pdf_page_to_png(pdf_bytes_bytes, 0, null, null);
|
||||
try testing.expect(result.len >= 100);
|
||||
}
|
||||
|
||||
test "render_pdf_page_out_of_range" {
|
||||
// render_pdf_page_to_png: page out of range
|
||||
suppress_abort();
|
||||
const pdf_bytes_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "pdf/fake_memo.pdf", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(pdf_bytes_bytes);
|
||||
const result = kreuzberg.render_pdf_page_to_png(pdf_bytes_bytes, 999, null, null) catch {
|
||||
try testing.expect(true); // Error occurred as expected
|
||||
return;
|
||||
};
|
||||
_ = result;
|
||||
}
|
||||
141
e2e/zig/src/plugin_api_test.zig
generated
Normal file
141
e2e/zig/src/plugin_api_test.zig
generated
Normal file
@@ -0,0 +1,141 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: plugin_api
|
||||
|
||||
test "register_document_extractor_trait_bridge" {
|
||||
// register_document_extractor: trait bridge
|
||||
suppress_abort();
|
||||
const TestStub_register_document_extractor_trait_bridge = struct {
|
||||
pub fn extract_bytes(_: *@This(), _: [*c]const u8, _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
pub fn extract_file(_: *@This(), _: [*c]const u8, _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
pub fn supported_mime_types(_: *@This()) [*c]const u8 { return "[]"; }
|
||||
pub fn priority(_: *@This()) i32 { return 0; }
|
||||
pub fn can_handle(_: *@This(), _: [*c]const u8, _: [*c]const u8) i32 { return 0; }
|
||||
};
|
||||
var stub_register_document_extractor_trait_bridge = TestStub_register_document_extractor_trait_bridge{};
|
||||
const vtable_register_document_extractor_trait_bridge = kreuzberg.make_document_extractor_vtable(TestStub_register_document_extractor_trait_bridge, &stub_register_document_extractor_trait_bridge);
|
||||
var out_err_register_document_extractor_trait_bridge: ?[*c]u8 = null;
|
||||
_ = kreuzberg.register_document_extractor("test", vtable_register_document_extractor_trait_bridge, &stub_register_document_extractor_trait_bridge, @ptrCast(&out_err_register_document_extractor_trait_bridge));
|
||||
}
|
||||
|
||||
test "register_embedding_backend_trait_bridge" {
|
||||
// register_embedding_backend: trait bridge
|
||||
suppress_abort();
|
||||
const TestStub_register_embedding_backend_trait_bridge = struct {
|
||||
pub fn dimensions(_: *@This()) u64 { return 0; }
|
||||
pub fn embed(_: *@This(), _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
};
|
||||
var stub_register_embedding_backend_trait_bridge = TestStub_register_embedding_backend_trait_bridge{};
|
||||
const vtable_register_embedding_backend_trait_bridge = kreuzberg.make_embedding_backend_vtable(TestStub_register_embedding_backend_trait_bridge, &stub_register_embedding_backend_trait_bridge);
|
||||
var out_err_register_embedding_backend_trait_bridge: ?[*c]u8 = null;
|
||||
_ = kreuzberg.register_embedding_backend("test", vtable_register_embedding_backend_trait_bridge, &stub_register_embedding_backend_trait_bridge, @ptrCast(&out_err_register_embedding_backend_trait_bridge));
|
||||
}
|
||||
|
||||
test "register_ocr_backend_trait_bridge" {
|
||||
// register_ocr_backend: trait bridge
|
||||
suppress_abort();
|
||||
const TestStub_register_ocr_backend_trait_bridge = struct {
|
||||
pub fn process_image(_: *@This(), _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
pub fn process_image_file(_: *@This(), _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
pub fn supports_language(_: *@This(), _: [*c]const u8) i32 { return 0; }
|
||||
pub fn backend_type(_: *@This()) [*c]const u8 { return "{}"; }
|
||||
pub fn supported_languages(_: *@This()) [*c]const u8 { return "[]"; }
|
||||
pub fn supports_table_detection(_: *@This()) i32 { return 0; }
|
||||
pub fn supports_document_processing(_: *@This()) i32 { return 0; }
|
||||
pub fn process_document(_: *@This(), _: [*c]const u8, _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
};
|
||||
var stub_register_ocr_backend_trait_bridge = TestStub_register_ocr_backend_trait_bridge{};
|
||||
const vtable_register_ocr_backend_trait_bridge = kreuzberg.make_ocr_backend_vtable(TestStub_register_ocr_backend_trait_bridge, &stub_register_ocr_backend_trait_bridge);
|
||||
var out_err_register_ocr_backend_trait_bridge: ?[*c]u8 = null;
|
||||
_ = kreuzberg.register_ocr_backend("test", vtable_register_ocr_backend_trait_bridge, &stub_register_ocr_backend_trait_bridge, @ptrCast(&out_err_register_ocr_backend_trait_bridge));
|
||||
}
|
||||
|
||||
test "register_post_processor_trait_bridge" {
|
||||
// register_post_processor: trait bridge
|
||||
suppress_abort();
|
||||
const TestStub_register_post_processor_trait_bridge = struct {
|
||||
pub fn process(_: *@This(), _: [*c]const u8, _: [*c]const u8) !void {}
|
||||
pub fn processing_stage(_: *@This()) [*c]const u8 { return "{}"; }
|
||||
pub fn should_process(_: *@This(), _: [*c]const u8, _: [*c]const u8) i32 { return 0; }
|
||||
pub fn estimated_duration_ms(_: *@This(), _: [*c]const u8) u64 { return 0; }
|
||||
pub fn priority(_: *@This()) i32 { return 0; }
|
||||
};
|
||||
var stub_register_post_processor_trait_bridge = TestStub_register_post_processor_trait_bridge{};
|
||||
const vtable_register_post_processor_trait_bridge = kreuzberg.make_post_processor_vtable(TestStub_register_post_processor_trait_bridge, &stub_register_post_processor_trait_bridge);
|
||||
var out_err_register_post_processor_trait_bridge: ?[*c]u8 = null;
|
||||
_ = kreuzberg.register_post_processor("test", vtable_register_post_processor_trait_bridge, &stub_register_post_processor_trait_bridge, @ptrCast(&out_err_register_post_processor_trait_bridge));
|
||||
}
|
||||
|
||||
test "register_renderer_trait_bridge" {
|
||||
// register_renderer: trait bridge
|
||||
suppress_abort();
|
||||
const TestStub_register_renderer_trait_bridge = struct {
|
||||
pub fn render(_: *@This(), _: [*c]const u8) ![*c]const u8 { return ""; }
|
||||
};
|
||||
var stub_register_renderer_trait_bridge = TestStub_register_renderer_trait_bridge{};
|
||||
const vtable_register_renderer_trait_bridge = kreuzberg.make_renderer_vtable(TestStub_register_renderer_trait_bridge, &stub_register_renderer_trait_bridge);
|
||||
var out_err_register_renderer_trait_bridge: ?[*c]u8 = null;
|
||||
_ = kreuzberg.register_renderer("test", vtable_register_renderer_trait_bridge, &stub_register_renderer_trait_bridge, @ptrCast(&out_err_register_renderer_trait_bridge));
|
||||
}
|
||||
|
||||
test "register_validator_trait_bridge" {
|
||||
// register_validator: trait bridge
|
||||
suppress_abort();
|
||||
const TestStub_register_validator_trait_bridge = struct {
|
||||
pub fn validate(_: *@This(), _: [*c]const u8, _: [*c]const u8) !void {}
|
||||
pub fn should_validate(_: *@This(), _: [*c]const u8, _: [*c]const u8) i32 { return 0; }
|
||||
pub fn priority(_: *@This()) i32 { return 0; }
|
||||
};
|
||||
var stub_register_validator_trait_bridge = TestStub_register_validator_trait_bridge{};
|
||||
const vtable_register_validator_trait_bridge = kreuzberg.make_validator_vtable(TestStub_register_validator_trait_bridge, &stub_register_validator_trait_bridge);
|
||||
var out_err_register_validator_trait_bridge: ?[*c]u8 = null;
|
||||
_ = kreuzberg.register_validator("test", vtable_register_validator_trait_bridge, &stub_register_validator_trait_bridge, @ptrCast(&out_err_register_validator_trait_bridge));
|
||||
}
|
||||
|
||||
test "unregister_document_extractor_after_register" {
|
||||
// unregister_document_extractor
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.unregister_document_extractor("test-extractor");
|
||||
}
|
||||
|
||||
test "unregister_embedding_backend_after_register" {
|
||||
// unregister_embedding_backend
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.unregister_embedding_backend("test-embedding-backend");
|
||||
}
|
||||
|
||||
test "unregister_post_processor_after_register" {
|
||||
// unregister_post_processor
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.unregister_post_processor("test-processor");
|
||||
}
|
||||
|
||||
test "unregister_renderer_after_register" {
|
||||
// unregister_renderer
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.unregister_renderer("test-renderer");
|
||||
}
|
||||
|
||||
test "unregister_validator_after_register" {
|
||||
// unregister_validator
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.unregister_validator("test-validator");
|
||||
}
|
||||
33
e2e/zig/src/post_processor_management_test.zig
generated
Normal file
33
e2e/zig/src/post_processor_management_test.zig
generated
Normal file
@@ -0,0 +1,33 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: post_processor_management
|
||||
|
||||
test "post_processors_clear" {
|
||||
// Clear all post-processors and verify list is empty
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.clear_post_processors();
|
||||
}
|
||||
|
||||
test "post_processors_list" {
|
||||
// List all registered post-processors
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_post_processors();
|
||||
}
|
||||
39
e2e/zig/src/registry_operations_test.zig
generated
Normal file
39
e2e/zig/src/registry_operations_test.zig
generated
Normal file
@@ -0,0 +1,39 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: registry_operations
|
||||
|
||||
test "extensions_docx" {
|
||||
// Get file extensions for DOCX MIME type
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.get_extensions_for_mime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
|
||||
}
|
||||
|
||||
test "extensions_html" {
|
||||
// Get file extensions for HTML MIME type
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.get_extensions_for_mime("text/html");
|
||||
}
|
||||
|
||||
test "extensions_pdf" {
|
||||
// Get file extensions for PDF MIME type
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.get_extensions_for_mime("application/pdf");
|
||||
}
|
||||
57
e2e/zig/src/registry_test.zig
generated
Normal file
57
e2e/zig/src/registry_test.zig
generated
Normal file
@@ -0,0 +1,57 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: registry
|
||||
|
||||
test "list_document_extractors" {
|
||||
// List document extractors
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_document_extractors();
|
||||
}
|
||||
|
||||
test "list_embedding_backends" {
|
||||
// List embedding backends
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_embedding_backends();
|
||||
}
|
||||
|
||||
test "list_ocr_backends" {
|
||||
// List OCR backends
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_ocr_backends();
|
||||
}
|
||||
|
||||
test "list_post_processors" {
|
||||
// List post-processors
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_post_processors();
|
||||
}
|
||||
|
||||
test "list_renderers" {
|
||||
// List renderers
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_renderers();
|
||||
}
|
||||
|
||||
test "list_validators" {
|
||||
// List validators
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_validators();
|
||||
}
|
||||
33
e2e/zig/src/renderer_management_test.zig
generated
Normal file
33
e2e/zig/src/renderer_management_test.zig
generated
Normal file
@@ -0,0 +1,33 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: renderer_management
|
||||
|
||||
test "renderers_clear" {
|
||||
// Clear all renderers and verify list is empty
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.clear_renderers();
|
||||
}
|
||||
|
||||
test "renderers_list" {
|
||||
// List all registered renderers
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_renderers();
|
||||
}
|
||||
229
e2e/zig/src/smoke_test.zig
generated
Normal file
229
e2e/zig/src/smoke_test.zig
generated
Normal file
@@ -0,0 +1,229 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: smoke
|
||||
|
||||
test "ocr_image_png" {
|
||||
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const content_bytes = try std.Io.Dir.cwd().readFileAlloc(std.testing.io, "images/test_hello_world.png", std.heap.c_allocator, .unlimited);
|
||||
defer std.heap.c_allocator.free(content_bytes);
|
||||
const _result_json = try kreuzberg.extract_bytes_sync(content_bytes, "image/png", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("image/png", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 1);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Hello") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "World") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "hello") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "world") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "smoke_docx_basic" {
|
||||
// Smoke test: DOCX with formatted text
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/vnd.openxmlformats-officedocument.wordprocessingml.document", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 20);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Lorem") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "ipsum") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "document") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "text") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "smoke_html_basic" {
|
||||
// Smoke test: HTML table extraction
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("html/simple_table.html", "text/html", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("text/html", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 10);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Sample Data Table") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Laptop") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Electronics") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "Product") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "smoke_image_png" {
|
||||
// Smoke test: PNG image (without OCR, metadata only)
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("images/sample.png", null, "{\"disable_ocr\":true}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("image/png", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
}
|
||||
|
||||
test "smoke_json_basic" {
|
||||
// Smoke test: JSON file extraction
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("json/simple.json", "application/json", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/json", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 5);
|
||||
}
|
||||
|
||||
test "smoke_pdf_basic" {
|
||||
// Smoke test: PDF with simple text extraction
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("pdf/fake_memo.pdf", "application/pdf", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/pdf", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 50);
|
||||
try testing.expect(
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "May 5, 2023") != null or
|
||||
std.mem.indexOf(u8, result.object.get("content").?.string, "To Whom it May Concern") != null
|
||||
);
|
||||
}
|
||||
|
||||
test "smoke_txt_basic" {
|
||||
// Smoke test: Plain text file
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("text/report.txt", "text/plain", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("text/plain", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 5);
|
||||
}
|
||||
|
||||
test "smoke_xlsx_basic" {
|
||||
// Smoke test: XLSX with basic spreadsheet data including tables
|
||||
suppress_abort();
|
||||
var gpa: std.heap.DebugAllocator(.{}) = .init;
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const _result_json = try kreuzberg.extract_file_sync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "{}");
|
||||
defer std.heap.c_allocator.free(_result_json);
|
||||
var _parsed = try std.json.parseFromSlice(std.json.Value, allocator, _result_json, .{});
|
||||
defer _parsed.deinit();
|
||||
const result = &_parsed.value;
|
||||
try testing.expectEqualStrings("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", std.mem.trim(u8, result.object.get("mime_type").?.string, " \n\r\t"));
|
||||
try testing.expect(result.object.get("content").?.string.len >= 100);
|
||||
{
|
||||
const _jva0 = result.object.get("content").?;
|
||||
const _jsa0 = if (_jva0 == .string) _jva0.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva0, .{});
|
||||
defer if (_jva0 != .string) std.heap.c_allocator.free(_jsa0);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa0, "Team") != null);
|
||||
}
|
||||
{
|
||||
const _jva1 = result.object.get("content").?;
|
||||
const _jsa1 = if (_jva1 == .string) _jva1.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva1, .{});
|
||||
defer if (_jva1 != .string) std.heap.c_allocator.free(_jsa1);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa1, "Location") != null);
|
||||
}
|
||||
{
|
||||
const _jva2 = result.object.get("content").?;
|
||||
const _jsa2 = if (_jva2 == .string) _jva2.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva2, .{});
|
||||
defer if (_jva2 != .string) std.heap.c_allocator.free(_jsa2);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa2, "Stanley Cups") != null);
|
||||
}
|
||||
{
|
||||
const _jva3 = result.object.get("content").?;
|
||||
const _jsa3 = if (_jva3 == .string) _jva3.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva3, .{});
|
||||
defer if (_jva3 != .string) std.heap.c_allocator.free(_jsa3);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa3, "Blues") != null);
|
||||
}
|
||||
{
|
||||
const _jva4 = result.object.get("content").?;
|
||||
const _jsa4 = if (_jva4 == .string) _jva4.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva4, .{});
|
||||
defer if (_jva4 != .string) std.heap.c_allocator.free(_jsa4);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa4, "Flyers") != null);
|
||||
}
|
||||
{
|
||||
const _jva5 = result.object.get("content").?;
|
||||
const _jsa5 = if (_jva5 == .string) _jva5.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva5, .{});
|
||||
defer if (_jva5 != .string) std.heap.c_allocator.free(_jsa5);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa5, "Maple Leafs") != null);
|
||||
}
|
||||
{
|
||||
const _jva6 = result.object.get("content").?;
|
||||
const _jsa6 = if (_jva6 == .string) _jva6.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva6, .{});
|
||||
defer if (_jva6 != .string) std.heap.c_allocator.free(_jsa6);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa6, "STL") != null);
|
||||
}
|
||||
{
|
||||
const _jva7 = result.object.get("content").?;
|
||||
const _jsa7 = if (_jva7 == .string) _jva7.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva7, .{});
|
||||
defer if (_jva7 != .string) std.heap.c_allocator.free(_jsa7);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa7, "PHI") != null);
|
||||
}
|
||||
{
|
||||
const _jva8 = result.object.get("content").?;
|
||||
const _jsa8 = if (_jva8 == .string) _jva8.string else try std.json.Stringify.valueAlloc(std.heap.c_allocator, _jva8, .{});
|
||||
defer if (_jva8 != .string) std.heap.c_allocator.free(_jsa8);
|
||||
try testing.expect(std.mem.indexOf(u8, _jsa8, "TOR") != null);
|
||||
}
|
||||
// skipped: field 'tables' not available on result type
|
||||
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
|
||||
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
|
||||
}
|
||||
33
e2e/zig/src/validator_management_test.zig
generated
Normal file
33
e2e/zig/src/validator_management_test.zig
generated
Normal file
@@ -0,0 +1,33 @@
|
||||
// This file is auto-generated by alef — DO NOT EDIT.
|
||||
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
|
||||
// To regenerate: alef generate
|
||||
// To verify freshness: alef verify --exit-code
|
||||
// Issues & docs: https://github.com/kreuzberg-dev/alef
|
||||
const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const kreuzberg = @import("kreuzberg");
|
||||
|
||||
// Suppress C++ global destructor aborts that break zig's --listen=- IPC
|
||||
extern "c" fn signal(sig: i32, handler: usize) usize;
|
||||
var _abort_handler_installed: bool = false;
|
||||
fn suppress_abort() void {
|
||||
if (!_abort_handler_installed) {
|
||||
// SIGABRT = 6 on POSIX; SIG_IGN = 1
|
||||
_ = signal(6, 1);
|
||||
_abort_handler_installed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// E2e tests for category: validator_management
|
||||
|
||||
test "validators_clear" {
|
||||
// Clear all validators and verify list is empty
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.clear_validators();
|
||||
}
|
||||
|
||||
test "validators_list" {
|
||||
// List all registered validators
|
||||
suppress_abort();
|
||||
_ = try kreuzberg.list_validators();
|
||||
}
|
||||
Reference in New Issue
Block a user