Files
fil/docs/snippets/wasm/advanced/chunking_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

import init, { extractBytes } from "kreuzberg-wasm";

await init();

const config = {
  chunking: {
    maxChars: 2000,
    chunkOverlap: 400,
    trim: true,
  },
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "application/pdf", config);

result.chunks?.forEach((chunk, idx) => {
  console.log(`Chunk ${chunk.metadata?.chunkIndex}/${chunk.metadata?.totalChunks}`);
  console.log(`  Position: ${chunk.metadata?.byteStart}-${chunk.metadata?.byteEnd}`);
  console.log(`  Content: "${chunk.content.substring(0, 50)}..."`);
});
import init, { extractBytes } from "kreuzberg-wasm";

await init();

const config = {
  chunking: {
    chunkerType: "markdown",
    maxChars: 1500,
    prependHeadingContext: true,
  },
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "text/markdown", config);

result.chunks?.forEach((chunk) => {
  // Content already includes heading context prepended
  console.log(chunk.content.substring(0, 80));
  console.log(
    `  Heading path: ${chunk.metadata?.headingContext?.headings?.map((h) => `${"#".repeat(h.level)} ${h.text}`).join(" > ")}`,
  );
});
import init, { extractBytes } from "kreuzberg-wasm";

await init();

const config = {
  chunking: {
    chunkerType: "semantic",
    maxChars: 1000,
    topicThreshold: 0.5, // Boundary detection at 50% topic change
    chunkOverlap: 100,
  },
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "text/markdown", config);

console.log(`Generated ${result.chunks?.length} semantic chunks`);
result.chunks?.forEach((chunk) => {
  console.log(`Chunk ${chunk.metadata?.chunkIndex}: ${chunk.content.length} chars`);
});