Nomad changes

2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions
--- a/docs/snippets/wasm/plugins/pdf_metadata_extractor.md
+++ b/docs/snippets/wasm/plugins/pdf_metadata_extractor.md
@@ -0,0 +1,49 @@
+# PDF Metadata Post-Processor
+
+Register a post-processor that extracts and enriches extraction results with PDF metadata.
+
+```typescript title="WASM"
+import init, { registerPostProcessor, extractBytes } from "kreuzberg-wasm";
+
+await init();
+
+// Define a PDF metadata extractor post-processor
+const pdfMetadataProcessor = {
+  processingStage: () => "post-extraction",
+  process: (extractionResult) => {
+    // Enrich extraction with metadata
+    const enriched = {
+      ...extractionResult,
+      metadata: {
+        ...extractionResult.metadata,
+        processorName: "pdf-metadata",
+        processedAt: new Date().toISOString(),
+        wordCount: (extractionResult.text || "").split(/\s+/).length,
+      },
+    };
+
+    return enriched;
+  },
+};
+
+try {
+  registerPostProcessor(pdfMetadataProcessor);
+  console.log("PDF metadata post-processor registered");
+} catch (error) {
+  console.error("Failed to register post-processor:", error);
+}
+
+// Extract with post-processing
+const pdfBytes = new Uint8Array([
+  /* PDF content */
+]);
+const config = {
+  ocr: null,
+  chunking: null,
+};
+
+const result = await extractBytes(pdfBytes, "application/pdf", config);
+console.log("Enriched metadata:", result.metadata);
+```
+
+The post-processor runs after extraction to enrich or transform the results.