This commit is contained in:
30
docs/snippets/wasm/cache/ocr-cache.ts
vendored
Normal file
30
docs/snippets/wasm/cache/ocr-cache.ts
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
import { extractBytes, initWasm, TesseractWasmBackend } from "@kreuzberg/wasm";
|
||||
|
||||
async function demonstrateOcrCaching() {
|
||||
await initWasm();
|
||||
|
||||
const backend = new TesseractWasmBackend();
|
||||
await backend.initialize();
|
||||
|
||||
console.log("Tesseract WASM backend loaded - models cached");
|
||||
|
||||
const imageBytes = new Uint8Array(await fetch("page1.png").then((r) => r.arrayBuffer()));
|
||||
|
||||
console.time("First OCR (with model load)");
|
||||
const _result1 = await extractBytes(imageBytes, "image/png", {
|
||||
ocr: { backend: "tesseract-wasm", language: "eng" },
|
||||
});
|
||||
console.timeEnd("First OCR (with model load)");
|
||||
|
||||
console.log("Model cached in memory");
|
||||
|
||||
const imageBytes2 = new Uint8Array(await fetch("page2.png").then((r) => r.arrayBuffer()));
|
||||
|
||||
console.time("Second OCR (model cached)");
|
||||
const _result2 = await extractBytes(imageBytes2, "image/png", {
|
||||
ocr: { backend: "tesseract-wasm", language: "eng" },
|
||||
});
|
||||
console.timeEnd("Second OCR (model cached)");
|
||||
}
|
||||
|
||||
demonstrateOcrCaching().catch(console.error);
|
||||
57
docs/snippets/wasm/cache/result-caching.ts
vendored
Normal file
57
docs/snippets/wasm/cache/result-caching.ts
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
import type { ExtractionResult } from "@kreuzberg/wasm";
|
||||
import { extractBytes, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
class ExtractionCache {
|
||||
private cache = new Map<string, ExtractionResult>();
|
||||
private fileHashes = new Map<File, string>();
|
||||
|
||||
async getHash(file: File): Promise<string> {
|
||||
if (this.fileHashes.has(file)) {
|
||||
return this.fileHashes.get(file)!;
|
||||
}
|
||||
|
||||
const buffer = await file.arrayBuffer();
|
||||
const hashBuffer = await crypto.subtle.digest("SHA-256", buffer);
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||
const hashStr = hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
|
||||
|
||||
this.fileHashes.set(file, hashStr);
|
||||
return hashStr;
|
||||
}
|
||||
|
||||
async extract(file: File): Promise<ExtractionResult> {
|
||||
const hash = await this.getHash(file);
|
||||
|
||||
if (this.cache.has(hash)) {
|
||||
console.log("Cache hit for", file.name);
|
||||
return this.cache.get(hash)!;
|
||||
}
|
||||
|
||||
console.log("Cache miss for", file.name);
|
||||
const bytes = new Uint8Array(await file.arrayBuffer());
|
||||
const result = await extractBytes(bytes, file.type);
|
||||
|
||||
this.cache.set(hash, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.cache.clear();
|
||||
this.fileHashes.clear();
|
||||
}
|
||||
|
||||
getSize() {
|
||||
return this.cache.size;
|
||||
}
|
||||
}
|
||||
|
||||
async function demonstrateCaching() {
|
||||
await initWasm();
|
||||
|
||||
const cache = new ExtractionCache();
|
||||
|
||||
const _result = await cache.extract(new File([], "test.pdf"));
|
||||
console.log("Cache size:", cache.getSize());
|
||||
}
|
||||
|
||||
demonstrateCaching().catch(console.error);
|
||||
43
docs/snippets/wasm/cache/session-storage.ts
vendored
Normal file
43
docs/snippets/wasm/cache/session-storage.ts
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
import type { ExtractionResult } from "@kreuzberg/wasm";
|
||||
import { extractFromFile, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
async function _cacheResultInSessionStorage(file: File): Promise<ExtractionResult> {
|
||||
await initWasm();
|
||||
|
||||
const cacheKey = `extraction_${file.name}_${file.size}`;
|
||||
|
||||
const cached = sessionStorage.getItem(cacheKey);
|
||||
if (cached) {
|
||||
console.log("Loading from session storage");
|
||||
return JSON.parse(cached);
|
||||
}
|
||||
|
||||
console.log("Extracting and caching result");
|
||||
const result = await extractFromFile(file);
|
||||
|
||||
try {
|
||||
sessionStorage.setItem(cacheKey, JSON.stringify(result));
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === "QuotaExceededError") {
|
||||
console.warn("Session storage full, skipping cache");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function clearExtractionCache() {
|
||||
const keys = Object.keys(sessionStorage);
|
||||
let cleared = 0;
|
||||
|
||||
for (const key of keys) {
|
||||
if (key.startsWith("extraction_")) {
|
||||
sessionStorage.removeItem(key);
|
||||
cleared++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Cleared ${cleared} cached results`);
|
||||
}
|
||||
|
||||
clearExtractionCache();
|
||||
Reference in New Issue
Block a user