This commit is contained in:
51
docs/snippets/wasm/plugins/min_length_validator.md
Normal file
51
docs/snippets/wasm/plugins/min_length_validator.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Minimum Length Text Validator
|
||||
|
||||
Register a validator that filters out extraction results with text below a minimum length threshold.
|
||||
|
||||
```typescript title="WASM"
|
||||
import init, { registerValidator, extractBytes } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const MIN_LENGTH = 10;
|
||||
|
||||
// Define a minimum length validator
|
||||
const minLengthValidator = {
|
||||
validate: (extractionResult) => {
|
||||
const textLength = extractionResult.text?.length || 0;
|
||||
|
||||
if (textLength < MIN_LENGTH) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Text too short: ${textLength} < ${MIN_LENGTH}`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
valid: true,
|
||||
error: null,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
registerValidator(minLengthValidator);
|
||||
console.log(`Min length validator registered (threshold: ${MIN_LENGTH})`);
|
||||
} catch (error) {
|
||||
console.error("Failed to register validator:", error);
|
||||
}
|
||||
|
||||
// Now extract with validation enabled
|
||||
const pdfBytes = new Uint8Array([
|
||||
/* PDF content */
|
||||
]);
|
||||
const config = {
|
||||
ocr: null,
|
||||
chunking: null,
|
||||
};
|
||||
|
||||
const result = await extractBytes(pdfBytes, "application/pdf", config);
|
||||
console.log("Validated result:", result);
|
||||
```
|
||||
|
||||
This validator ensures extracted text meets minimum quality standards by checking length.
|
||||
Reference in New Issue
Block a user