```typescript title="WASM - Extract and Score Keywords" import init, { extractBytes } from "kreuzberg-wasm"; await init(); const config = { outputFormat: "markdown", }; const bytes = new Uint8Array(buffer); const result = await extractBytes(bytes, "application/pdf", config); // TF-IDF style keyword extraction (simplified) interface Keyword { term: string; frequency: number; uniquePositions: number[]; score: number; } const text = result.content.toLowerCase(); const words = text.split(/[\s\n\t]+/); const tokenMap = new Map(); // Record word positions words.forEach((word, idx) => { const cleaned = word.replace(/[^\w]/g, ""); if (cleaned.length > 3) { if (!tokenMap.has(cleaned)) { tokenMap.set(cleaned, []); } tokenMap.get(cleaned)!.push(idx); } }); // Calculate keyword scores const keywords: Keyword[] = Array.from(tokenMap.entries()).map(([term, positions]) => ({ term, frequency: positions.length, uniquePositions: positions, score: positions.length * Math.log(words.length / positions.length), })); // Sort by score (TF-IDF approximation) keywords.sort((a, b) => b.score - a.score); // Top 15 keywords const topKeywords = keywords.slice(0, 15); console.log("Top Keywords:"); topKeywords.forEach((kw) => { console.log(` ${kw.term}: ${kw.frequency} occurrences (score: ${kw.score.toFixed(2)})`); }); ``` ```typescript title="WASM - Keyword Context Window" import init, { extractBytes } from "kreuzberg-wasm"; await init(); const config = { outputFormat: "markdown", chunking: { maxChars: 1000, chunkOverlap: 200, }, }; const bytes = new Uint8Array(buffer); const result = await extractBytes(bytes, "application/pdf", config); interface KeywordContext { keyword: string; contexts: string[]; } // Find keyword occurrences with surrounding context function extractKeywordContexts( text: string, keyword: string, contextWindow: number = 50, ): string[] { const contexts: string[] = []; const regex = new RegExp(keyword, "gi"); let match; while ((match = regex.exec(text)) !== null) { const start = Math.max(0, match.index - contextWindow); const end = Math.min(text.length, match.index + keyword.length + contextWindow); contexts.push(text.substring(start, end)); } return contexts; } // Extract context for top keywords const topKeywords = ["document", "analysis", "results"]; const keywordContexts: KeywordContext[] = topKeywords.map((kw) => ({ keyword: kw, contexts: extractKeywordContexts(result.content, kw, 40), })); keywordContexts.forEach((kc) => { console.log(`\n"${kc.keyword}" appears ${kc.contexts.length} times:`); kc.contexts.slice(0, 2).forEach((ctx, idx) => { console.log(` [${idx + 1}] ...${ctx}...`); }); }); ``` - Keyword extraction without native YAKE/RAKE requires manual text processing.