This commit is contained in:
26
docs/snippets/wasm/getting-started/async-extraction.ts
Normal file
26
docs/snippets/wasm/getting-started/async-extraction.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import { extractBytes, getWasmCapabilities, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
async function extractDocuments(files: Uint8Array[], mimeTypes: string[]) {
|
||||
const caps = getWasmCapabilities();
|
||||
if (!caps.hasWasm) {
|
||||
throw new Error("WebAssembly not supported");
|
||||
}
|
||||
|
||||
await initWasm();
|
||||
|
||||
const results = await Promise.all(
|
||||
files.map((bytes, index) => extractBytes(bytes, mimeTypes[index])),
|
||||
);
|
||||
|
||||
return results.map((r) => ({
|
||||
content: r.content,
|
||||
pageCount: r.metadata?.pageCount,
|
||||
}));
|
||||
}
|
||||
|
||||
const fileBytes = [new Uint8Array([1, 2, 3])];
|
||||
const mimes = ["application/pdf"];
|
||||
|
||||
extractDocuments(fileBytes, mimes)
|
||||
.then((results) => console.log(results))
|
||||
.catch(console.error);
|
||||
17
docs/snippets/wasm/getting-started/basic-extract.ts
Normal file
17
docs/snippets/wasm/getting-started/basic-extract.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import { extractBytes, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
async function main() {
|
||||
await initWasm();
|
||||
|
||||
const buffer = await fetch("document.pdf").then((r) => r.arrayBuffer());
|
||||
const bytes = new Uint8Array(buffer);
|
||||
|
||||
const result = await extractBytes(bytes, "application/pdf");
|
||||
|
||||
console.log("Extracted content:");
|
||||
console.log(result.content);
|
||||
console.log("MIME type:", result.mimeType);
|
||||
console.log("Metadata:", result.metadata);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
12
docs/snippets/wasm/getting-started/basic_usage.md
Normal file
12
docs/snippets/wasm/getting-started/basic_usage.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```typescript title="WASM"
|
||||
import init, { extractBytes } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const response = await fetch("document.pdf");
|
||||
const data = new Uint8Array(await response.arrayBuffer());
|
||||
|
||||
const result = await extractBytes(data, "application/pdf", undefined);
|
||||
console.log(result.content);
|
||||
console.log(`MIME Type: ${result.mime_type}`);
|
||||
```
|
||||
33
docs/snippets/wasm/getting-started/batch-processing.ts
Normal file
33
docs/snippets/wasm/getting-started/batch-processing.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import { extractBytes, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
interface DocumentJob {
|
||||
name: string;
|
||||
bytes: Uint8Array;
|
||||
mimeType: string;
|
||||
}
|
||||
|
||||
async function _processBatch(documents: DocumentJob[], concurrency: number = 3) {
|
||||
await initWasm();
|
||||
|
||||
const results: Record<string, string> = {};
|
||||
const queue = [...documents];
|
||||
|
||||
const workers = Array(concurrency)
|
||||
.fill(null)
|
||||
.map(async () => {
|
||||
while (queue.length > 0) {
|
||||
const doc = queue.shift();
|
||||
if (!doc) break;
|
||||
|
||||
try {
|
||||
const result = await extractBytes(doc.bytes, doc.mimeType);
|
||||
results[doc.name] = result.content;
|
||||
} catch (error) {
|
||||
console.error(`Failed to process ${doc.name}:`, error);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all(workers);
|
||||
return results;
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
```typescript title="WASM"
|
||||
import { extractFromFile, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
await initWasm();
|
||||
|
||||
const fileInputs = document.getElementById("files") as HTMLInputElement;
|
||||
const files = Array.from(fileInputs.files || []);
|
||||
|
||||
const results = await Promise.all(files.map((file) => extractFromFile(file)));
|
||||
|
||||
results.forEach((result, i) => {
|
||||
console.log(`File ${i + 1}: ${result.content.length} characters`);
|
||||
});
|
||||
```
|
||||
29
docs/snippets/wasm/getting-started/browser-file-input.ts
Normal file
29
docs/snippets/wasm/getting-started/browser-file-input.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { extractFromFile, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
async function setupFileInput() {
|
||||
await initWasm();
|
||||
|
||||
const fileInput = document.getElementById("file-input") as HTMLInputElement;
|
||||
|
||||
fileInput.addEventListener("change", async (event) => {
|
||||
const file = (event.target as HTMLInputElement).files?.[0];
|
||||
if (!file) return;
|
||||
|
||||
try {
|
||||
const result = await extractFromFile(file);
|
||||
console.log("Extracted text:", result.content);
|
||||
displayResults(result);
|
||||
} catch (error) {
|
||||
console.error("Extraction failed:", error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function displayResults(result: any) {
|
||||
const output = document.getElementById("output");
|
||||
if (output) {
|
||||
output.textContent = `${result.content.substring(0, 500)}...`;
|
||||
}
|
||||
}
|
||||
|
||||
setupFileInput().catch(console.error);
|
||||
12
docs/snippets/wasm/getting-started/extract_bytes_sync.md
Normal file
12
docs/snippets/wasm/getting-started/extract_bytes_sync.md
Normal file
@@ -0,0 +1,12 @@
|
||||
```typescript title="WASM"
|
||||
import { extractBytes, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
await initWasm();
|
||||
|
||||
const response = await fetch("document.pdf");
|
||||
const buffer = await response.arrayBuffer();
|
||||
const data = new Uint8Array(buffer);
|
||||
|
||||
const result = await extractBytes(data, "application/pdf");
|
||||
console.log(result.content);
|
||||
```
|
||||
10
docs/snippets/wasm/getting-started/extract_file.md
Normal file
10
docs/snippets/wasm/getting-started/extract_file.md
Normal file
@@ -0,0 +1,10 @@
|
||||
```typescript title="WASM"
|
||||
import init, { extractFile } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const result = await extractFile("document.pdf", undefined, undefined);
|
||||
console.log(`Extracted content: ${result.content}`);
|
||||
console.log(`Tables found: ${result.tables?.length ?? 0}`);
|
||||
console.log(`Format: ${result.metadata?.format ?? "unknown"}`);
|
||||
```
|
||||
17
docs/snippets/wasm/getting-started/extract_file_async.md
Normal file
17
docs/snippets/wasm/getting-started/extract_file_async.md
Normal file
@@ -0,0 +1,17 @@
|
||||
```typescript title="WASM"
|
||||
import { extractFromFile, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
await initWasm();
|
||||
|
||||
const fileInput = document.getElementById("file") as HTMLInputElement;
|
||||
const file = fileInput.files?.[0];
|
||||
|
||||
if (file) {
|
||||
const result = await extractFromFile(file);
|
||||
const content = result.content;
|
||||
const tableCount = result.tables.length;
|
||||
|
||||
console.log(`Content length: ${content.length} characters`);
|
||||
console.log(`Tables: ${tableCount}`);
|
||||
}
|
||||
```
|
||||
15
docs/snippets/wasm/getting-started/extract_file_sync.md
Normal file
15
docs/snippets/wasm/getting-started/extract_file_sync.md
Normal file
@@ -0,0 +1,15 @@
|
||||
```typescript title="WASM"
|
||||
import { extractFromFile, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
await initWasm();
|
||||
|
||||
const fileInput = document.getElementById("file") as HTMLInputElement;
|
||||
const file = fileInput.files?.[0];
|
||||
|
||||
if (file) {
|
||||
const result = await extractFromFile(file);
|
||||
console.log(result.content);
|
||||
console.log(`Tables: ${result.tables.length}`);
|
||||
console.log(`Metadata: ${JSON.stringify(result.metadata)}`);
|
||||
}
|
||||
```
|
||||
20
docs/snippets/wasm/getting-started/extract_with_ocr.md
Normal file
20
docs/snippets/wasm/getting-started/extract_with_ocr.md
Normal file
@@ -0,0 +1,20 @@
|
||||
```typescript title="WASM"
|
||||
import init, { extractBytes } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const response = await fetch("scanned.pdf");
|
||||
const data = new Uint8Array(await response.arrayBuffer());
|
||||
|
||||
const config = {
|
||||
force_ocr: true,
|
||||
ocr: {
|
||||
backend: "tesseract",
|
||||
language: "eng",
|
||||
},
|
||||
};
|
||||
|
||||
const result = await extractBytes(data, "application/pdf", config);
|
||||
console.log(result.content);
|
||||
console.log(`Detected languages: ${result.detected_languages?.join(", ") ?? "unknown"}`);
|
||||
```
|
||||
9
docs/snippets/wasm/getting-started/hello_world.md
Normal file
9
docs/snippets/wasm/getting-started/hello_world.md
Normal file
@@ -0,0 +1,9 @@
|
||||
```typescript title="WASM"
|
||||
import init, { extractBytes } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const data = new Uint8Array([0x25, 0x50, 0x44, 0x46]); // PDF magic bytes
|
||||
const result = await extractBytes(data, "application/pdf", undefined);
|
||||
console.log(result.content);
|
||||
```
|
||||
25
docs/snippets/wasm/getting-started/initialization.ts
Normal file
25
docs/snippets/wasm/getting-started/initialization.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { getVersion, getWasmCapabilities, initWasm, isInitialized } from "@kreuzberg/wasm";
|
||||
|
||||
async function initializeKreuzberg() {
|
||||
const caps = getWasmCapabilities();
|
||||
|
||||
if (!caps.hasWasm) {
|
||||
console.error("WebAssembly not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (!isInitialized()) {
|
||||
await initWasm();
|
||||
}
|
||||
|
||||
const version = getVersion();
|
||||
console.log(`Kreuzberg ${version} initialized successfully`);
|
||||
console.log("Workers available:", caps.hasWorkers);
|
||||
console.log("SharedArrayBuffer available:", caps.hasSharedArrayBuffer);
|
||||
} catch (error) {
|
||||
console.error("Initialization failed:", error);
|
||||
}
|
||||
}
|
||||
|
||||
initializeKreuzberg();
|
||||
6
docs/snippets/wasm/getting-started/install_verify.md
Normal file
6
docs/snippets/wasm/getting-started/install_verify.md
Normal file
@@ -0,0 +1,6 @@
|
||||
```typescript title="WASM"
|
||||
import init from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
console.log("Kreuzberg WASM loaded successfully");
|
||||
```
|
||||
26
docs/snippets/wasm/getting-started/read_content.md
Normal file
26
docs/snippets/wasm/getting-started/read_content.md
Normal file
@@ -0,0 +1,26 @@
|
||||
```typescript title="WASM"
|
||||
import init, { extractBytes } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const response = await fetch("document.pdf");
|
||||
const data = new Uint8Array(await response.arrayBuffer());
|
||||
|
||||
const result = await extractBytes(data, "application/pdf", undefined);
|
||||
|
||||
console.log(`Content: ${result.content}`);
|
||||
console.log(`Success: true`);
|
||||
console.log(`Content length: ${result.content.length} characters`);
|
||||
|
||||
if (result.tables && result.tables.length > 0) {
|
||||
result.tables.forEach((table, i) => {
|
||||
console.log(`Table ${i}: ${table.rows?.length ?? 0} rows`);
|
||||
});
|
||||
}
|
||||
|
||||
if (result.chunks && result.chunks.length > 0) {
|
||||
result.chunks.forEach((chunk, i) => {
|
||||
console.log(`Chunk ${i}: ${chunk.text?.length ?? 0} characters`);
|
||||
});
|
||||
}
|
||||
```
|
||||
32
docs/snippets/wasm/getting-started/runtime-detection.ts
Normal file
32
docs/snippets/wasm/getting-started/runtime-detection.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import {
|
||||
detectRuntime,
|
||||
getWasmCapabilities,
|
||||
initWasm,
|
||||
isBrowser,
|
||||
isBun,
|
||||
isDeno,
|
||||
isNode,
|
||||
} from "@kreuzberg/wasm";
|
||||
|
||||
async function setupForRuntime() {
|
||||
const runtime = detectRuntime();
|
||||
const caps = getWasmCapabilities();
|
||||
|
||||
console.log(`Running in ${runtime} environment`);
|
||||
console.log(`Workers: ${caps.hasWorkers}`);
|
||||
console.log(`SharedArrayBuffer: ${caps.hasSharedArrayBuffer}`);
|
||||
|
||||
if (isBrowser()) {
|
||||
console.log("Browser features available");
|
||||
} else if (isNode()) {
|
||||
console.log("Node.js features available");
|
||||
} else if (isDeno()) {
|
||||
console.log("Deno features available");
|
||||
} else if (isBun()) {
|
||||
console.log("Bun features available");
|
||||
}
|
||||
|
||||
await initWasm();
|
||||
}
|
||||
|
||||
setupForRuntime().catch(console.error);
|
||||
Reference in New Issue
Block a user