Files
fil/docs/snippets/wasm/config/basic-config.ts
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

30 lines
733 B
TypeScript

import type { ExtractionConfig } from "@kreuzberg/wasm";
import { extractBytes, initWasm } from "@kreuzberg/wasm";
async function extractWithConfig() {
await initWasm();
const bytes = new Uint8Array(await fetch("document.pdf").then((r) => r.arrayBuffer()));
const config: ExtractionConfig = {
ocr: {
backend: "tesseract-wasm",
language: "eng",
},
images: {
extractImages: true,
targetDpi: 200,
},
chunking: {
maxChars: 1000,
chunkOverlap: 100,
},
};
const result = await extractBytes(bytes, "application/pdf", config);
console.log("Extraction complete");
console.log("Content length:", result.content.length);
}
extractWithConfig().catch(console.error);