Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

3
e2e/wasm/.mocharc.json generated Normal file
View File

@@ -0,0 +1,3 @@
{
"loader": "./wasm-loader.mjs"
}

42
e2e/wasm/node.loader.mjs generated Normal file
View File

@@ -0,0 +1,42 @@
// Custom Node.js loader for WASM imports
import Module from "module";
const originalResolveFilename = Module.prototype._resolveFilename;
Module.prototype._resolveFilename = function (request, parent, isMain) {
if (request === "env" || request === "wasi_snapshot_preview1") {
// Return a fake module path that won't be resolved
// Instead, we'll handle it in the import hook
return request;
}
return originalResolveFilename.apply(this, arguments);
};
// ES module loader hook
export async function resolve(specifier, context, nextResolve) {
if (specifier === "env" || specifier === "wasi_snapshot_preview1") {
return {
url: "node:vm",
shortCircuit: true,
};
}
return nextResolve(specifier);
}
export async function getFormat(url, context, nextGetFormat) {
return nextGetFormat(url);
}
export async function getSource(url, context, nextGetSource) {
return nextGetSource(url);
}
export async function load(url, context, nextLoad) {
if (url === "node:vm") {
return {
format: "module",
source: "export default {}; export const wasi_snapshot_preview1 = {};",
};
}
return nextLoad(url);
}

1748
e2e/wasm/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

14
e2e/wasm/package.json generated Normal file
View File

@@ -0,0 +1,14 @@
{
"name": "@kreuzberg/wasm-e2e-wasm",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"test": "NODE_OPTIONS=--max-old-space-size=4096 vitest run"
},
"devDependencies": {
"@kreuzberg/wasm": "file:../../crates/kreuzberg-wasm/pkg/nodejs",
"rollup": "^4.53.3",
"vitest": "^4.1.5"
}
}

1013
e2e/wasm/pnpm-lock.yaml generated Normal file

File diff suppressed because it is too large Load Diff

5
e2e/wasm/pnpm-workspace.yaml generated Normal file
View File

@@ -0,0 +1,5 @@
packages:
- "."
allowBuilds:
esbuild: true
tree-sitter: true

43
e2e/wasm/register-wasm-loader.cjs generated Normal file
View File

@@ -0,0 +1,43 @@
// Register WASI imports for WASM modules in Node.js
// This file is loaded before vitest runs
const Module = require("module");
const path = require("path");
// Create mock WASI and env objects
const env = {};
const wasi_snapshot_preview1 = {
proc_exit: () => {},
environ_get: () => 0,
environ_sizes_get: () => 0,
fd_write: () => 0,
fd_read: () => 0,
fd_seek: () => 0,
fd_close: () => 0,
fd_prestat: () => 8,
fd_prestat_dir_name: () => 0,
path_open: () => 8,
path_create_directory: () => 0,
path_remove_directory: () => 0,
path_unlink_file: () => 0,
path_filestat_get: () => 0,
path_rename: () => 0,
sys_info: () => 0,
clock_time_get: () => 0,
random_get: (buf, buflen) => 0,
thread_spawn: () => 0,
args_get: () => 0,
args_sizes_get: () => 0,
};
// Patch require to provide these modules
const originalRequire = Module.prototype.require;
Module.prototype.require = function (id) {
if (id === "env") return env;
if (id === "wasi_snapshot_preview1") return wasi_snapshot_preview1;
return originalRequire.apply(this, arguments);
};
// For ES modules, we need a different approach - use globalThis
globalThis.env = env;
globalThis.wasi_snapshot_preview1 = wasi_snapshot_preview1;

173
e2e/wasm/setup.ts generated Normal file
View File

@@ -0,0 +1,173 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { createRequire } from 'module';
import { readFileSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
// Pre-initialize the wasm-bindgen module so that exports are callable
// in every vitest worker. The async default export uses fetch() which
// does not support file:// URLs in Node.js; use initSync with a
// readFileSync buffer instead.
try {
const _require = createRequire(import.meta.url);
const wasmPkgDir = _require.resolve('@kreuzberg/wasm');
const wasmModule = await import(/* @vite-ignore */ wasmPkgDir);
const initSync = (wasmModule as unknown as Record<string, unknown>).initSync as ((mod: WebAssembly.Module | BufferSource) => unknown) | undefined;
if (typeof initSync === 'function') {
// Locate the .wasm binary next to the JS entry.
const wasmJsPath = fileURLToPath(new URL(wasmPkgDir, 'file://'));
const wasmBinPath = wasmJsPath.replace(/\.js$/, '_bg.wasm');
const wasmBytes = readFileSync(wasmBinPath);
// Pass as object form to avoid wasm-bindgen deprecation warning.
initSync({ module: wasmBytes });
} else {
// Fallback: try the async default init (wasm-pack --target nodejs bundles).
const initDefault = (wasmModule as unknown as Record<string, unknown>).default as (() => Promise<unknown>) | undefined;
if (typeof initDefault === 'function') await initDefault();
}
} catch (err) {
// Module may not require explicit init — continue anyway.
console.warn('[alef wasm setup] init skipped:', (err as Error).message);
}
// Patch CommonJS `require('env')` and `require('wasi_snapshot_preview1')` to
// return shim objects. wasm-pack `--target nodejs` emits bare `require()`
// calls for these from getrandom/wasi transitives, but they are not real
// Node modules — the WASM module imports them by name and the host is
// expected to satisfy them. Patch Module._load BEFORE the wasm bundle is
// imported by any test file.
// Note: setupFiles run per-test-worker; vitest imports the test files
// AFTER setupFiles complete, so this hook installs in time.
{
const _require = createRequire(import.meta.url);
const Module = _require('module');
// env.system / env.mkstemp come from C-runtime calls embedded in some
// WASM-compiled deps (e.g. tesseract-wasm). Tests that don't exercise
// those paths only need the imports to be callable for module instantiation.
const env = {
system: (_cmd: number) => -1,
mkstemp: (_template: number) => -1,
};
// WASI shims. Critical: clock_time_get and random_get must produce realistic
// values — returning 0 for all clock calls causes WASM-side timing loops to
// spin forever (e.g. getrandom's spin-until-elapsed retry), and zero-filled
// random buffers can cause init loops in deps expecting non-zero entropy.
const _wasiMemoryView = (): DataView | null => {
// Imports are wired before the WASM is instantiated; the bundle stashes
// its instance on a runtime-known global once available. We try to grab
// it lazily so writes to wasm memory go to the right place.
const g = globalThis as unknown as { __alef_wasm_memory__?: WebAssembly.Memory };
return g.__alef_wasm_memory__ ? new DataView(g.__alef_wasm_memory__.buffer) : null;
};
const _cryptoFill = (buf: Uint8Array) => {
const c = globalThis.crypto;
if (c && typeof c.getRandomValues === 'function') c.getRandomValues(buf);
else for (let i = 0; i < buf.length; i++) buf[i] = Math.floor(Math.random() * 256);
};
const wasi_snapshot_preview1 = {
proc_exit: () => {},
environ_get: () => 0,
environ_sizes_get: (countOut: number, _sizeOut: number) => {
const v = _wasiMemoryView();
if (v) v.setUint32(countOut, 0, true);
return 0;
},
// WASI fd_write must update `nwritten_ptr` with the total bytes consumed,
// otherwise libc-style callers (e.g. tesseract-compiled-to-wasm fputs)
// see 0 of N bytes written and retry forever, hanging the host.
fd_write: (_fd: number, iovsPtr: number, iovsLen: number, nwrittenPtr: number) => {
const v = _wasiMemoryView();
if (!v) return 0;
let total = 0;
for (let i = 0; i < iovsLen; i++) {
const off = iovsPtr + i * 8;
total += v.getUint32(off + 4, true);
}
v.setUint32(nwrittenPtr, total, true);
return 0;
},
// Mirror fd_write: callers retry on partial reads. Reporting 0 bytes
// read (EOF) is fine; just make sure `nread_ptr` is written.
fd_read: (_fd: number, _iovsPtr: number, _iovsLen: number, nreadPtr: number) => {
const v = _wasiMemoryView();
if (v) v.setUint32(nreadPtr, 0, true);
return 0;
},
fd_seek: () => 0,
fd_close: () => 0,
fd_prestat_get: () => 8, // EBADF — no preopens.
fd_prestat_dir_name: () => 0,
fd_fdstat_get: () => 0,
fd_fdstat_set_flags: () => 0,
path_open: () => 44, // ENOENT.
path_create_directory: () => 0,
path_remove_directory: () => 0,
path_unlink_file: () => 0,
path_filestat_get: () => 44, // ENOENT.
path_rename: () => 0,
clock_time_get: (_clockId: number, _precision: bigint, timeOut: number) => {
const ns = BigInt(Date.now()) * 1_000_000n + BigInt(performance.now() | 0) % 1_000_000n;
const v = _wasiMemoryView();
if (v) v.setBigUint64(timeOut, ns, true);
return 0;
},
clock_res_get: (_clockId: number, resOut: number) => {
const v = _wasiMemoryView();
if (v) v.setBigUint64(resOut, 1_000n, true);
return 0;
},
random_get: (bufPtr: number, bufLen: number) => {
const g = globalThis as unknown as { __alef_wasm_memory__?: WebAssembly.Memory };
if (!g.__alef_wasm_memory__) return 0;
_cryptoFill(new Uint8Array(g.__alef_wasm_memory__.buffer, bufPtr, bufLen));
return 0;
},
args_get: () => 0,
args_sizes_get: (countOut: number, _sizeOut: number) => {
const v = _wasiMemoryView();
if (v) v.setUint32(countOut, 0, true);
return 0;
},
poll_oneoff: () => 0,
sched_yield: () => 0,
};
const _origResolve = Module._resolveFilename;
Module._resolveFilename = function(request: string, parent: unknown, ...rest: unknown[]) {
if (request === 'env' || request === 'wasi_snapshot_preview1') return request;
return _origResolve.call(this, request, parent, ...rest);
};
const _origLoad = Module._load;
Module._load = function(request: string, parent: unknown, ...rest: unknown[]) {
if (request === 'env') return env;
if (request === 'wasi_snapshot_preview1') return wasi_snapshot_preview1;
return _origLoad.call(this, request, parent, ...rest);
};
// Capture the WASM linear memory at instantiation time so the WASI shims
// can read/write into it. Without this, every shim that needs memory
// (fd_write nwritten, clock_time_get, random_get, etc.) silently no-ops
// and the host-side C runtime hangs in a retry loop.
const _OrigInstance = WebAssembly.Instance;
const PatchedInstance = function(this: WebAssembly.Instance, mod: WebAssembly.Module, imports?: WebAssembly.Imports) {
const inst = new _OrigInstance(mod, imports);
const exportsMem = (inst.exports as Record<string, unknown>).memory;
if (exportsMem instanceof WebAssembly.Memory) {
(globalThis as unknown as { __alef_wasm_memory__?: WebAssembly.Memory }).__alef_wasm_memory__ = exportsMem;
}
return inst;
} as unknown as typeof WebAssembly.Instance;
PatchedInstance.prototype = _OrigInstance.prototype;
(WebAssembly as unknown as { Instance: typeof WebAssembly.Instance }).Instance = PatchedInstance;
}
// Change to the configured test-documents directory so that fixture file paths like
// "pdf/fake_memo.pdf" resolve correctly when vitest runs from e2e/wasm/.
// setup.ts lives in e2e/wasm/; the fixtures dir lives at the repository root,
// two directories up: e2e/wasm/ -> e2e/ -> repo root.
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const testDocumentsDir = join(__dirname, '..', '..', 'test_documents');
process.chdir(testDocumentsDir);

44
e2e/wasm/tests/async.test.ts generated Normal file
View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractBytes, WasmExtractionConfig, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('async', () => { it('async_extract_bytes_empty_mime: extract_bytes empty MIME async', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytes(_content_content, "", undefined); }).rejects.toThrow();
}, 30000);
it('async_extract_bytes_invalid_mime: extract_bytes unsupported MIME async', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytes(_content_content, "application/x-nonexistent", undefined); }).rejects.toThrow();
}, 30000);
});

48
e2e/wasm/tests/detection.test.ts generated Normal file
View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, detectMimeTypeFromBytes, getExtensionsForMime, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('detection', () => { it('detect_mime_bytes_html: Detect HTML MIME from bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('html/html.html'); detectMimeTypeFromBytes(_content_content);
}, 30000);
it('detect_mime_bytes_pdf: Detect PDF MIME type from bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); detectMimeTypeFromBytes(_content_content);
}, 30000);
it('detect_mime_bytes_png: Detect PNG MIME type from bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('images/test_hello_world.png'); detectMimeTypeFromBytes(_content_content);
}, 30000);
it('get_extensions_unknown_mime: get_extensions unknown MIME', async () => { await expect(async () => { await getExtensionsForMime("application/x-totally-unknown"); }).rejects.toThrow();
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearDocumentExtractors, listDocumentExtractors, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('document_extractor_management', () => { it('document_extractors_clear: Clear all document extractors and verify list is empty', () => { clearDocumentExtractors();
}, 30000);
it('extractors_list: List all registered document extractors', () => { listDocumentExtractors();
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearEmbeddingBackends, listEmbeddingBackends, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('embedding_backend_management', () => { it('embedding_backends_clear: Clear all embedding backends and verify list is empty', () => { clearEmbeddingBackends();
}, 600000);
it('embedding_backends_list: List all registered embedding backends', () => { listEmbeddingBackends();
}, 600000);
});

49
e2e/wasm/tests/mime_utilities.test.ts generated Normal file
View File

@@ -0,0 +1,49 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, detectMimeTypeFromBytes, getExtensionsForMime, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('mime_utilities', () => { it('mime_detect_bytes: Detect MIME type from file bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); const result = detectMimeTypeFromBytes(_content_content); // skipped: field 'result' not applicable for simple result type
}, 30000);
it('mime_detect_image: Detect MIME type from PNG image bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('images/test_hello_world.png'); const result = detectMimeTypeFromBytes(_content_content); // skipped: field 'result' not applicable for simple result type
}, 30000);
it('mime_get_extensions: Get file extensions for a MIME type', () => { const result = getExtensionsForMime("application/pdf"); // skipped: field 'result' not applicable for simple result type
}, 30000);
});

View File

@@ -0,0 +1,46 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearOcrBackends, listOcrBackends, unregisterOcrBackend, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('ocr_backend_management', () => { it('ocr_backends_clear: Clear all OCR backends and verify list is empty', () => { clearOcrBackends();
}, 30000);
it('ocr_backends_list: List all registered OCR backends', () => { listOcrBackends();
}, 30000);
it('ocr_backends_unregister: Unregister nonexistent OCR backend gracefully', () => { unregisterOcrBackend("nonexistent-backend-xyz");
}, 30000);
});

104
e2e/wasm/tests/plugin_api.test.ts generated Normal file
View File

@@ -0,0 +1,104 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, registerDocumentExtractor, registerEmbeddingBackend, registerOcrBackend, registerPostProcessor, registerRenderer, registerValidator, unregisterDocumentExtractor, unregisterEmbeddingBackend, unregisterPostProcessor, unregisterRenderer, unregisterValidator, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('plugin_api', () => { it('register_document_extractor_trait_bridge: register_document_extractor: trait bridge', () => { class _TestStub_register_document_extractor_trait_bridge {
name(): string { return "test-extractor"; }
async extract_bytes(_p0?: any, _p1?: any, _p2?: any): Promise<string> { return "{}"; }
async extract_file(_p0?: any, _p1?: any, _p2?: any): Promise<string> { return "{}"; }
supported_mime_types(): string { return []; }
priority(): number { return 1; }
can_handle(_p0?: any, _p1?: any): boolean { return false; }
}
registerDocumentExtractor(new _TestStub_register_document_extractor_trait_bridge());
}, 30000);
it('register_embedding_backend_trait_bridge: register_embedding_backend: trait bridge', () => { class _TestStub_register_embedding_backend_trait_bridge {
name(): string { return "test-embedding-backend"; }
dimensions(): number { return 1; }
async embed(_p0?: any): Promise<string> { return []; }
}
registerEmbeddingBackend(new _TestStub_register_embedding_backend_trait_bridge());
}, 600000);
it('register_ocr_backend_trait_bridge: register_ocr_backend: trait bridge', () => { class _TestStub_register_ocr_backend_trait_bridge {
name(): string { return "test-backend"; }
async process_image(_p0?: any, _p1?: any): Promise<string> { return "{}"; }
async process_image_file(_p0?: any, _p1?: any): Promise<string> { return "{}"; }
supports_language(_p0?: any): boolean { return false; }
backend_type(): string { return "{}"; }
supported_languages(): string { return []; }
supports_table_detection(): boolean { return false; }
supports_document_processing(): boolean { return false; }
async process_document(_p0?: any, _p1?: any): Promise<string> { return "{}"; }
}
registerOcrBackend(new _TestStub_register_ocr_backend_trait_bridge());
}, 30000);
it('register_post_processor_trait_bridge: register_post_processor: trait bridge', () => { class _TestStub_register_post_processor_trait_bridge {
name(): string { return "test-processor"; }
async process(_p0?: any, _p1?: any): Promise<void> { return undefined; }
processing_stage(): string { return "{}"; }
should_process(_p0?: any, _p1?: any): boolean { return false; }
estimated_duration_ms(_p0?: any): number { return 1; }
priority(): number { return 1; }
}
registerPostProcessor(new _TestStub_register_post_processor_trait_bridge());
}, 30000);
it('register_renderer_trait_bridge: register_renderer: trait bridge', () => { class _TestStub_register_renderer_trait_bridge {
name(): string { return "test-renderer"; }
render(_p0?: any): string { return ""; }
}
registerRenderer(new _TestStub_register_renderer_trait_bridge());
}, 30000);
it('register_validator_trait_bridge: register_validator: trait bridge', () => { class _TestStub_register_validator_trait_bridge {
name(): string { return "test-validator"; }
async validate(_p0?: any, _p1?: any): Promise<void> { return undefined; }
should_validate(_p0?: any, _p1?: any): boolean { return false; }
priority(): number { return 1; }
}
registerValidator(new _TestStub_register_validator_trait_bridge());
}, 30000);
it('unregister_document_extractor_after_register: unregister_document_extractor', () => { unregisterDocumentExtractor("test-extractor");
}, 30000);
it('unregister_embedding_backend_after_register: unregister_embedding_backend', () => { unregisterEmbeddingBackend("test-embedding-backend");
}, 600000);
it('unregister_post_processor_after_register: unregister_post_processor', () => { unregisterPostProcessor("test-processor");
}, 30000);
it('unregister_renderer_after_register: unregister_renderer', () => { unregisterRenderer("test-renderer");
}, 30000);
it('unregister_validator_after_register: unregister_validator', () => { unregisterValidator("test-validator");
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearPostProcessors, listPostProcessors, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('post_processor_management', () => { it('post_processors_clear: Clear all post-processors and verify list is empty', () => { clearPostProcessors();
}, 30000);
it('post_processors_list: List all registered post-processors', () => { listPostProcessors();
}, 30000);
});

52
e2e/wasm/tests/registry.test.ts generated Normal file
View File

@@ -0,0 +1,52 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, listDocumentExtractors, listEmbeddingBackends, listOcrBackends, listPostProcessors, listRenderers, listValidators, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('registry', () => { it('list_document_extractors: List document extractors', () => { listDocumentExtractors();
}, 30000);
it('list_embedding_backends: List embedding backends', () => { listEmbeddingBackends();
}, 600000);
it('list_ocr_backends: List OCR backends', () => { listOcrBackends();
}, 30000);
it('list_post_processors: List post-processors', () => { listPostProcessors();
}, 30000);
it('list_renderers: List renderers', () => { listRenderers();
}, 30000);
it('list_validators: List validators', () => { listValidators();
}, 30000);
});

View File

@@ -0,0 +1,46 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, getExtensionsForMime, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('registry_operations', () => { it('extensions_docx: Get file extensions for DOCX MIME type', () => { getExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
}, 30000);
it('extensions_html: Get file extensions for HTML MIME type', () => { getExtensionsForMime("text/html");
}, 30000);
it('extensions_pdf: Get file extensions for PDF MIME type', () => { getExtensionsForMime("application/pdf");
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearRenderers, listRenderers, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('renderer_management', () => { it('renderers_clear: Clear all renderers and verify list is empty', () => { clearRenderers();
}, 30000);
it('renderers_list: List all registered renderers', () => { listRenderers();
}, 30000);
});

45
e2e/wasm/tests/smoke.test.ts generated Normal file
View File

@@ -0,0 +1,45 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractBytes, WasmExtractionConfig, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('smoke', () => { it('ocr_image_png: OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.', async () => { const _content_content = await (await import('node:fs/promises')).readFile('images/test_hello_world.png'); const result = await extractBytes(_content_content, "image/png", undefined); expect(result.mimeType.trim()).toBe("image/png");
expect(result.content.length).toBeGreaterThanOrEqual(1);
expect(["Hello", "World", "hello", "world"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearValidators, listValidators, WasmAccelerationConfig, WasmChunkingConfig, WasmContentFilterConfig, WasmEmailConfig, WasmEmbeddingConfig, WasmHierarchyConfig, WasmHtmlOutputConfig, WasmImageExtractionConfig, WasmImagePreprocessingConfig, WasmKeywordConfig, WasmLanguageDetectionConfig, WasmLayoutDetectionConfig, WasmLlmConfig, WasmOcrConfig, WasmOcrElementConfig, WasmOcrPipelineConfig, WasmOcrPipelineStage, WasmOcrQualityThresholds, WasmPageConfig, WasmPdfConfig, WasmPostProcessorConfig, WasmRakeParams, WasmSecurityLimits, WasmStructuredExtractionConfig, WasmTesseractConfig, WasmTokenReductionOptions, WasmTreeSitterConfig, WasmTreeSitterProcessConfig, WasmYakeParams } from '@kreuzberg/wasm';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('validator_management', () => { it('validators_clear: Clear all validators and verify list is empty', () => { clearValidators();
}, 30000);
it('validators_list: List all registered validators', () => { listValidators();
}, 30000);
});

12
e2e/wasm/tsconfig.json generated Normal file
View File

@@ -0,0 +1,12 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"strictNullChecks": false,
"esModuleInterop": true,
"skipLibCheck": true
},
"include": ["tests/**/*.ts", "vitest.config.ts"]
}

6
e2e/wasm/vitest-setup.ts generated Normal file
View File

@@ -0,0 +1,6 @@
// Setup WASI polyfills before importing kreuzberg
import { createWasiPreview1 } from "jco";
// This will be executed before any tests
const wasiImports = createWasiPreview1({});
(global as any).__WASI__ = wasiImports;

25
e2e/wasm/vitest.config.local.ts generated Normal file
View File

@@ -0,0 +1,25 @@
// Local vitest configuration to handle WASM imports
import { defineConfig, mergeConfig } from "vitest/config";
import baseConfig from "./vitest.config";
export default mergeConfig(
baseConfig,
defineConfig({
test: {
// Use the default Node.js test environment with WASM support
env: {
// Mock out WASI imports
env: JSON.stringify({}),
},
// Limit parallelism to reduce memory spikes from V8 WASM isolates
fileParallelism: false,
isolate: false,
},
resolve: {
alias: {
env: new URL("./wasi-polyfill.js", import.meta.url).pathname,
wasi_snapshot_preview1: new URL("./wasi-polyfill.js", import.meta.url).pathname,
},
},
}),
);

16
e2e/wasm/vitest.config.ts generated Normal file
View File

@@ -0,0 +1,16 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
include: ['tests/**/*.test.ts'],
testTimeout: 30000,
hookTimeout: 120000,
teardownTimeout: 30000,
setupFiles: ['./setup.ts'],
},
});

27
e2e/wasm/wasi-polyfill.js generated Normal file
View File

@@ -0,0 +1,27 @@
// WASI and env polyfill for Node.js WASM testing
const env = {};
const wasi_snapshot_preview1 = {
proc_exit: () => {},
environ_get: () => 0,
environ_sizes_get: () => 0,
fd_write: () => 0,
fd_read: () => 0,
fd_seek: () => 0,
fd_close: () => 0,
fd_prestat: () => 8,
fd_prestat_dir_name: () => 0,
path_open: () => 8,
path_create_directory: () => 0,
path_remove_directory: () => 0,
path_unlink_file: () => 0,
path_filestat_get: () => 0,
path_rename: () => 0,
sys_info: () => 0,
clock_time_get: () => 0,
random_get: (buf, buflen) => 0,
thread_spawn: () => 0,
};
// Register these globally for WASM imports
globalThis.env = env;
globalThis.wasi_snapshot_preview1 = wasi_snapshot_preview1;

44
e2e/wasm/wasm-loader.mjs generated Normal file
View File

@@ -0,0 +1,44 @@
// WASM loader that provides WASI imports
import Module from "module";
import { fileURLToPath } from "url";
import { dirname } from "path";
const __dirname = dirname(fileURLToPath(import.meta.url));
// Mock WASI environment
const mockEnv = {
// Standard C library functions
malloc: () => 0,
free: () => {},
memcpy: () => 0,
memset: () => 0,
strlen: () => 0,
};
const mockWasi = {
proc_exit: () => {},
environ_get: () => 0,
environ_sizes_get: () => 0,
fd_write: () => 0,
fd_read: () => 0,
fd_seek: () => 0,
fd_close: () => 0,
fd_prestat: () => 8,
fd_prestat_dir_name: () => 0,
path_open: () => 8,
path_create_directory: () => 0,
path_remove_directory: () => 0,
path_unlink_file: () => 0,
path_filestat_get: () => 0,
path_rename: () => 0,
sys_info: () => 0,
clock_time_get: () => 0,
random_get: (buf, buflen) => 0,
thread_spawn: () => 0,
args_get: () => 0,
args_sizes_get: () => 0,
};
// Register globals
globalThis.env = mockEnv;
globalThis.wasi_snapshot_preview1 = mockWasi;