Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

30
docs/snippets/wasm/cache/ocr-cache.ts vendored Normal file
View File

@@ -0,0 +1,30 @@
import { extractBytes, initWasm, TesseractWasmBackend } from "@kreuzberg/wasm";
async function demonstrateOcrCaching() {
await initWasm();
const backend = new TesseractWasmBackend();
await backend.initialize();
console.log("Tesseract WASM backend loaded - models cached");
const imageBytes = new Uint8Array(await fetch("page1.png").then((r) => r.arrayBuffer()));
console.time("First OCR (with model load)");
const _result1 = await extractBytes(imageBytes, "image/png", {
ocr: { backend: "tesseract-wasm", language: "eng" },
});
console.timeEnd("First OCR (with model load)");
console.log("Model cached in memory");
const imageBytes2 = new Uint8Array(await fetch("page2.png").then((r) => r.arrayBuffer()));
console.time("Second OCR (model cached)");
const _result2 = await extractBytes(imageBytes2, "image/png", {
ocr: { backend: "tesseract-wasm", language: "eng" },
});
console.timeEnd("Second OCR (model cached)");
}
demonstrateOcrCaching().catch(console.error);

View File

@@ -0,0 +1,57 @@
import type { ExtractionResult } from "@kreuzberg/wasm";
import { extractBytes, initWasm } from "@kreuzberg/wasm";
class ExtractionCache {
private cache = new Map<string, ExtractionResult>();
private fileHashes = new Map<File, string>();
async getHash(file: File): Promise<string> {
if (this.fileHashes.has(file)) {
return this.fileHashes.get(file)!;
}
const buffer = await file.arrayBuffer();
const hashBuffer = await crypto.subtle.digest("SHA-256", buffer);
const hashArray = Array.from(new Uint8Array(hashBuffer));
const hashStr = hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
this.fileHashes.set(file, hashStr);
return hashStr;
}
async extract(file: File): Promise<ExtractionResult> {
const hash = await this.getHash(file);
if (this.cache.has(hash)) {
console.log("Cache hit for", file.name);
return this.cache.get(hash)!;
}
console.log("Cache miss for", file.name);
const bytes = new Uint8Array(await file.arrayBuffer());
const result = await extractBytes(bytes, file.type);
this.cache.set(hash, result);
return result;
}
clear() {
this.cache.clear();
this.fileHashes.clear();
}
getSize() {
return this.cache.size;
}
}
async function demonstrateCaching() {
await initWasm();
const cache = new ExtractionCache();
const _result = await cache.extract(new File([], "test.pdf"));
console.log("Cache size:", cache.getSize());
}
demonstrateCaching().catch(console.error);

View File

@@ -0,0 +1,43 @@
import type { ExtractionResult } from "@kreuzberg/wasm";
import { extractFromFile, initWasm } from "@kreuzberg/wasm";
async function _cacheResultInSessionStorage(file: File): Promise<ExtractionResult> {
await initWasm();
const cacheKey = `extraction_${file.name}_${file.size}`;
const cached = sessionStorage.getItem(cacheKey);
if (cached) {
console.log("Loading from session storage");
return JSON.parse(cached);
}
console.log("Extracting and caching result");
const result = await extractFromFile(file);
try {
sessionStorage.setItem(cacheKey, JSON.stringify(result));
} catch (error) {
if (error instanceof Error && error.name === "QuotaExceededError") {
console.warn("Session storage full, skipping cache");
}
}
return result;
}
async function clearExtractionCache() {
const keys = Object.keys(sessionStorage);
let cleared = 0;
for (const key of keys) {
if (key.startsWith("extraction_")) {
sessionStorage.removeItem(key);
cleared++;
}
}
console.log(`Cleared ${cleared} cached results`);
}
clearExtractionCache();