Files
fil/docs/snippets/wasm/plugins/stateful_plugin.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.9 KiB

Stateful Post-Processor Plugin

Create a stateful post-processor that maintains state across multiple extraction calls.

import init, { registerPostProcessor, extractBytes } from "kreuzberg-wasm";

await init();

// Create a stateful post-processor using a closure
function createStatefulProcessor() {
  const state = {
    extractionCount: 0,
    totalChars: 0,
    lastResult: null,
  };

  return {
    processingStage: () => "post-extraction",
    process: (extractionResult) => {
      // Update state
      state.extractionCount++;
      state.totalChars += extractionResult.text?.length || 0;
      state.lastResult = extractionResult;

      // Enrich result with statistics
      const enriched = {
        ...extractionResult,
        metadata: {
          ...extractionResult.metadata,
          extractionIndex: state.extractionCount,
          cumulativeChars: state.totalChars,
          averageDocLength: Math.round(state.totalChars / state.extractionCount),
        },
      };

      console.log(
        `[Extraction ${state.extractionCount}] ${enriched.text?.length || 0} chars, cumulative: ${state.totalChars}`,
      );

      return enriched;
    },

    // Optional: expose state for inspection
    getState: () => state,
  };
}

// Register the stateful processor
const statefulProcessor = createStatefulProcessor();
registerPostProcessor(statefulProcessor);

// Multiple extractions use the same state
async function processMultipleDocs() {
  const docs = [
    new Uint8Array([
      /* Doc 1 */
    ]),
    new Uint8Array([
      /* Doc 2 */
    ]),
    new Uint8Array([
      /* Doc 3 */
    ]),
  ];

  const results = [];
  for (const docBytes of docs) {
    const result = await extractBytes(docBytes, "application/pdf", {});
    results.push(result);
  }

  return results;
}

await processMultipleDocs();

Stateful processors can track metrics across multiple extractions or maintain context.