Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
# Register Custom Validator Plugin
Register a custom validator that checks extraction results for quality or correctness.
```typescript title="WASM"
import init, { registerValidator, extractBytes } from "kreuzberg-wasm";
await init();
// Define a custom validator
const customValidator = {
validate: (extractionResult) => {
const text = extractionResult.text || "";
// Check for minimum content
if (text.length === 0) {
return {
valid: false,
error: "No text extracted from document",
};
}
// Check for suspicious patterns
const hasRepeatingChars = /(.)\1{5,}/.test(text);
if (hasRepeatingChars) {
return {
valid: false,
error: "Text contains excessive repeating characters (possible OCR error)",
};
}
// Check if text is mostly whitespace
if (text.trim().length < text.length * 0.5) {
return {
valid: false,
error: "Text is mostly whitespace",
};
}
return {
valid: true,
error: null,
};
},
};
try {
registerValidator(customValidator);
console.log("Custom validator registered");
} catch (error) {
console.error("Failed to register validator:", error);
}
// Extract and validate
async function extractAndValidate(fileBytes, mimeType) {
const result = await extractBytes(fileBytes, mimeType, {});
const validation = customValidator.validate(result);
if (!validation.valid) {
console.warn("Validation failed:", validation.error);
} else {
console.log("✓ Extraction passed validation");
}
return result;
}
```
Validators run after extraction to ensure results meet quality standards.