import type { ExtractionConfig } from "@kreuzberg/wasm"; import { extractBytes, initWasm } from "@kreuzberg/wasm"; async function extractWithChunking() { await initWasm(); const bytes = new Uint8Array(await fetch("book.pdf").then((r) => r.arrayBuffer())); const config: ExtractionConfig = { chunking: { maxChars: 800, chunkOverlap: 150, splitOnNewlines: true, splitOnSentences: true, }, }; // Example: prepend heading context so each chunk carries its heading breadcrumb const configWithHeadings: ExtractionConfig = { chunking: { chunkerType: "markdown", maxChars: 800, prependHeadingContext: true, }, }; const result = await extractBytes(bytes, "application/pdf", config); if (result.chunks) { console.log(`Total chunks: ${result.chunks.length}`); result.chunks.slice(0, 3).forEach((chunk, i) => { console.log(`\nChunk ${i}:`); console.log(`Chars: ${chunk.metadata.charStart}-${chunk.metadata.charEnd}`); console.log(`Content: ${chunk.content.substring(0, 100)}...`); }); } } async function extractWithPrependHeadingContext() { await initWasm(); const bytes = new Uint8Array(await fetch("document.md").then((r) => r.arrayBuffer())); const config: ExtractionConfig = { chunking: { chunkerType: "markdown", maxChars: 800, prependHeadingContext: true, }, }; const result = await extractBytes(bytes, "text/markdown", config); if (result.chunks) { console.log(`Total chunks: ${result.chunks.length}`); result.chunks.slice(0, 3).forEach((chunk, i) => { // Each chunk's content is prefixed with its heading breadcrumb console.log(`\nChunk ${i}: ${chunk.content.substring(0, 100)}...`); }); } } extractWithChunking().catch(console.error); extractWithPrependHeadingContext().catch(console.error);