Files
fil/docs/snippets/wasm/config/chunking_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.7 KiB

import { initWasm, extractBytes } from "@kreuzberg/wasm";

await initWasm();

const config = {
  chunking: {
    maxChars: 1000,
    chunkOverlap: 100,
  },
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "application/pdf", config);

result.chunks?.forEach((chunk, idx) => {
  console.log(`Chunk ${idx}: ${chunk.content.substring(0, 50)}...`);
  console.log(`Tokens: ${chunk.metadata?.token_count}`);
});
import { initWasm, extractBytes } from "@kreuzberg/wasm";

await initWasm();

const config = {
  chunking: {
    chunkerType: "markdown",
    maxChars: 2000,
    // Note: Token-based sizing is not available in WASM builds.
    // Use character-based sizing instead.
  },
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "text/markdown", config);

result.chunks?.forEach((chunk, idx) => {
  console.log(`Chunk ${idx}: ${chunk.content.substring(0, 50)}...`);

  if (chunk.metadata?.headingContext?.headings) {
    console.log("Headings:");
    chunk.metadata.headingContext.headings.forEach((h) => {
      console.log(`  Level ${h.level}: ${h.text}`);
    });
  }
});
import { initWasm, extractBytes } from "@kreuzberg/wasm";

await initWasm();

const config = {
  chunking: {
    chunkerType: "markdown",
    maxChars: 2000,
    prependHeadingContext: true,
  },
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "text/markdown", config);

result.chunks?.forEach((chunk, idx) => {
  // Each chunk's content is prefixed with its heading breadcrumb
  console.log(`Chunk ${idx}: ${chunk.content.substring(0, 80)}...`);
});