Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

173
e2e/wasm/setup.ts generated Normal file
View File

@@ -0,0 +1,173 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { createRequire } from 'module';
import { readFileSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
// Pre-initialize the wasm-bindgen module so that exports are callable
// in every vitest worker. The async default export uses fetch() which
// does not support file:// URLs in Node.js; use initSync with a
// readFileSync buffer instead.
try {
const _require = createRequire(import.meta.url);
const wasmPkgDir = _require.resolve('@kreuzberg/wasm');
const wasmModule = await import(/* @vite-ignore */ wasmPkgDir);
const initSync = (wasmModule as unknown as Record<string, unknown>).initSync as ((mod: WebAssembly.Module | BufferSource) => unknown) | undefined;
if (typeof initSync === 'function') {
// Locate the .wasm binary next to the JS entry.
const wasmJsPath = fileURLToPath(new URL(wasmPkgDir, 'file://'));
const wasmBinPath = wasmJsPath.replace(/\.js$/, '_bg.wasm');
const wasmBytes = readFileSync(wasmBinPath);
// Pass as object form to avoid wasm-bindgen deprecation warning.
initSync({ module: wasmBytes });
} else {
// Fallback: try the async default init (wasm-pack --target nodejs bundles).
const initDefault = (wasmModule as unknown as Record<string, unknown>).default as (() => Promise<unknown>) | undefined;
if (typeof initDefault === 'function') await initDefault();
}
} catch (err) {
// Module may not require explicit init — continue anyway.
console.warn('[alef wasm setup] init skipped:', (err as Error).message);
}
// Patch CommonJS `require('env')` and `require('wasi_snapshot_preview1')` to
// return shim objects. wasm-pack `--target nodejs` emits bare `require()`
// calls for these from getrandom/wasi transitives, but they are not real
// Node modules — the WASM module imports them by name and the host is
// expected to satisfy them. Patch Module._load BEFORE the wasm bundle is
// imported by any test file.
// Note: setupFiles run per-test-worker; vitest imports the test files
// AFTER setupFiles complete, so this hook installs in time.
{
const _require = createRequire(import.meta.url);
const Module = _require('module');
// env.system / env.mkstemp come from C-runtime calls embedded in some
// WASM-compiled deps (e.g. tesseract-wasm). Tests that don't exercise
// those paths only need the imports to be callable for module instantiation.
const env = {
system: (_cmd: number) => -1,
mkstemp: (_template: number) => -1,
};
// WASI shims. Critical: clock_time_get and random_get must produce realistic
// values — returning 0 for all clock calls causes WASM-side timing loops to
// spin forever (e.g. getrandom's spin-until-elapsed retry), and zero-filled
// random buffers can cause init loops in deps expecting non-zero entropy.
const _wasiMemoryView = (): DataView | null => {
// Imports are wired before the WASM is instantiated; the bundle stashes
// its instance on a runtime-known global once available. We try to grab
// it lazily so writes to wasm memory go to the right place.
const g = globalThis as unknown as { __alef_wasm_memory__?: WebAssembly.Memory };
return g.__alef_wasm_memory__ ? new DataView(g.__alef_wasm_memory__.buffer) : null;
};
const _cryptoFill = (buf: Uint8Array) => {
const c = globalThis.crypto;
if (c && typeof c.getRandomValues === 'function') c.getRandomValues(buf);
else for (let i = 0; i < buf.length; i++) buf[i] = Math.floor(Math.random() * 256);
};
const wasi_snapshot_preview1 = {
proc_exit: () => {},
environ_get: () => 0,
environ_sizes_get: (countOut: number, _sizeOut: number) => {
const v = _wasiMemoryView();
if (v) v.setUint32(countOut, 0, true);
return 0;
},
// WASI fd_write must update `nwritten_ptr` with the total bytes consumed,
// otherwise libc-style callers (e.g. tesseract-compiled-to-wasm fputs)
// see 0 of N bytes written and retry forever, hanging the host.
fd_write: (_fd: number, iovsPtr: number, iovsLen: number, nwrittenPtr: number) => {
const v = _wasiMemoryView();
if (!v) return 0;
let total = 0;
for (let i = 0; i < iovsLen; i++) {
const off = iovsPtr + i * 8;
total += v.getUint32(off + 4, true);
}
v.setUint32(nwrittenPtr, total, true);
return 0;
},
// Mirror fd_write: callers retry on partial reads. Reporting 0 bytes
// read (EOF) is fine; just make sure `nread_ptr` is written.
fd_read: (_fd: number, _iovsPtr: number, _iovsLen: number, nreadPtr: number) => {
const v = _wasiMemoryView();
if (v) v.setUint32(nreadPtr, 0, true);
return 0;
},
fd_seek: () => 0,
fd_close: () => 0,
fd_prestat_get: () => 8, // EBADF — no preopens.
fd_prestat_dir_name: () => 0,
fd_fdstat_get: () => 0,
fd_fdstat_set_flags: () => 0,
path_open: () => 44, // ENOENT.
path_create_directory: () => 0,
path_remove_directory: () => 0,
path_unlink_file: () => 0,
path_filestat_get: () => 44, // ENOENT.
path_rename: () => 0,
clock_time_get: (_clockId: number, _precision: bigint, timeOut: number) => {
const ns = BigInt(Date.now()) * 1_000_000n + BigInt(performance.now() | 0) % 1_000_000n;
const v = _wasiMemoryView();
if (v) v.setBigUint64(timeOut, ns, true);
return 0;
},
clock_res_get: (_clockId: number, resOut: number) => {
const v = _wasiMemoryView();
if (v) v.setBigUint64(resOut, 1_000n, true);
return 0;
},
random_get: (bufPtr: number, bufLen: number) => {
const g = globalThis as unknown as { __alef_wasm_memory__?: WebAssembly.Memory };
if (!g.__alef_wasm_memory__) return 0;
_cryptoFill(new Uint8Array(g.__alef_wasm_memory__.buffer, bufPtr, bufLen));
return 0;
},
args_get: () => 0,
args_sizes_get: (countOut: number, _sizeOut: number) => {
const v = _wasiMemoryView();
if (v) v.setUint32(countOut, 0, true);
return 0;
},
poll_oneoff: () => 0,
sched_yield: () => 0,
};
const _origResolve = Module._resolveFilename;
Module._resolveFilename = function(request: string, parent: unknown, ...rest: unknown[]) {
if (request === 'env' || request === 'wasi_snapshot_preview1') return request;
return _origResolve.call(this, request, parent, ...rest);
};
const _origLoad = Module._load;
Module._load = function(request: string, parent: unknown, ...rest: unknown[]) {
if (request === 'env') return env;
if (request === 'wasi_snapshot_preview1') return wasi_snapshot_preview1;
return _origLoad.call(this, request, parent, ...rest);
};
// Capture the WASM linear memory at instantiation time so the WASI shims
// can read/write into it. Without this, every shim that needs memory
// (fd_write nwritten, clock_time_get, random_get, etc.) silently no-ops
// and the host-side C runtime hangs in a retry loop.
const _OrigInstance = WebAssembly.Instance;
const PatchedInstance = function(this: WebAssembly.Instance, mod: WebAssembly.Module, imports?: WebAssembly.Imports) {
const inst = new _OrigInstance(mod, imports);
const exportsMem = (inst.exports as Record<string, unknown>).memory;
if (exportsMem instanceof WebAssembly.Memory) {
(globalThis as unknown as { __alef_wasm_memory__?: WebAssembly.Memory }).__alef_wasm_memory__ = exportsMem;
}
return inst;
} as unknown as typeof WebAssembly.Instance;
PatchedInstance.prototype = _OrigInstance.prototype;
(WebAssembly as unknown as { Instance: typeof WebAssembly.Instance }).Instance = PatchedInstance;
}
// Change to the configured test-documents directory so that fixture file paths like
// "pdf/fake_memo.pdf" resolve correctly when vitest runs from e2e/wasm/.
// setup.ts lives in e2e/wasm/; the fixtures dir lives at the repository root,
// two directories up: e2e/wasm/ -> e2e/ -> repo root.
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const testDocumentsDir = join(__dirname, '..', '..', 'test_documents');
process.chdir(testDocumentsDir);