```typescript title="WASM - Token Counting and Cost Estimation" import init, { extractBytes } from "kreuzberg-wasm"; await init(); const config = { tokenReduction: { mode: "balanced", preserveImportantWords: true, }, }; const bytes = new Uint8Array(buffer); const result = await extractBytes(bytes, "application/pdf", config); // Simple token counting (approximation: 1 token ≈ 4 chars) function estimateTokenCount(text: string): number { return Math.ceil(text.length / 4); } // LLM pricing (example: GPT-4 Turbo) interface PricingEstimate { tokenCount: number; inputCost: number; outputCostEstimate: number; totalEstimate: number; } const tokenCount = estimateTokenCount(result.content); const inputPricePerToken = 0.00001; // $0.01/1K tokens const outputPricePerToken = 0.00003; // $0.03/1K tokens const costEstimate: PricingEstimate = { tokenCount, inputCost: tokenCount * inputPricePerToken, outputCostEstimate: tokenCount * outputPricePerToken * 0.5, // Assume output is ~50% of input totalEstimate: tokenCount * inputPricePerToken + tokenCount * outputPricePerToken * 0.5, }; console.log("Token and Cost Analysis:"); console.log(` Estimated tokens: ${costEstimate.tokenCount}`); console.log(` Input cost: $${costEstimate.inputCost.toFixed(6)}`); console.log(` Output cost (est.): $${costEstimate.outputCostEstimate.toFixed(6)}`); console.log(` Total cost (est.): $${costEstimate.totalEstimate.toFixed(6)}`); ``` ```typescript title="WASM - Token Reduction for Context Windows" import init, { extractBytes } from "kreuzberg-wasm"; await init(); interface ContextWindowFit { contentLength: number; estimatedTokens: number; fitsInWindow: boolean; utilization: number; } function checkContextWindowFit( content: string, contextWindowSize: number = 4096, ): ContextWindowFit { const estimatedTokens = Math.ceil(content.length / 4); const fitsInWindow = estimatedTokens < contextWindowSize; const utilization = estimatedTokens / contextWindowSize; return { contentLength: content.length, estimatedTokens, fitsInWindow, utilization, }; } const config = { tokenReduction: { mode: "aggressive", // Use aggressive mode for large documents preserveImportantWords: true, }, }; const bytes = new Uint8Array(buffer); const result = await extractBytes(bytes, "application/pdf", config); const contextFit = checkContextWindowFit(result.content, 4096); console.log("Context Window Analysis:"); console.log(` Content: ${contextFit.contentLength} characters`); console.log(` Tokens (est.): ${contextFit.estimatedTokens}`); console.log(` Fits in 4K context: ${contextFit.fitsInWindow ? "YES" : "NO"}`); console.log(` Utilization: ${(contextFit.utilization * 100).toFixed(1)}%`); if (!contextFit.fitsInWindow) { console.log(" Note: Consider chunking or more aggressive token reduction"); } ``` ```typescript title="WASM - Selective Token Preservation" import init, { extractBytes } from "kreuzberg-wasm"; await init(); const config = { tokenReduction: { mode: "balanced", preserveImportantWords: true, }, }; const bytes = new Uint8Array(buffer); const result = await extractBytes(bytes, "application/pdf", config); // Extract important terms manually interface ImportantTerm { term: string; frequency: number; importance: number; } function extractImportantTerms(content: string, threshold: number = 3): ImportantTerm[] { const words = content.toLowerCase().split(/\s+/); const frequencyMap = new Map(); words.forEach((word) => { const cleaned = word.replace(/[^\w]/g, ""); if (cleaned.length > 5) { // Only consider longer words frequencyMap.set(cleaned, (frequencyMap.get(cleaned) || 0) + 1); } }); return Array.from(frequencyMap.entries()) .filter(([_, freq]) => freq >= threshold) .map(([term, freq]) => ({ term, frequency: freq, importance: Math.log(freq) * (term.length / 10), })) .sort((a, b) => b.importance - a.importance) .slice(0, 20); } const importantTerms = extractImportantTerms(result.content); console.log("Important Terms (likely preserved by token reduction):"); importantTerms.forEach((t) => { console.log(` "${t.term}": ${t.frequency} occurrences (importance: ${t.importance.toFixed(2)})`); }); ```