Files
fil/docs/snippets/wasm/advanced/quality_processing_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

104 lines
2.8 KiB
Markdown

```typescript title="WASM - Enable Quality Processing"
import init, { extractBytes } from "kreuzberg-wasm";
await init();
const config = {
enableQualityProcessing: true,
};
const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "application/pdf", config);
console.log(`Quality score: ${result.qualityScore?.toFixed(3) || "N/A"}`);
console.log(`Content: ${result.content.substring(0, 100)}...`);
// Quality score indicates text extraction quality (0.0-1.0)
if (result.qualityScore && result.qualityScore < 0.5) {
console.warn("Low quality extraction detected - consider OCR or alternative processing");
}
```
```typescript title="WASM - Quality Monitoring"
import init, { extractBytes } from "kreuzberg-wasm";
await init();
interface ExtractionQuality {
contentLength: number;
qualityScore: number | null;
assessedAs: string;
}
const config = {
enableQualityProcessing: true,
};
const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "application/pdf", config);
const quality: ExtractionQuality = {
contentLength: result.content.length,
qualityScore: result.qualityScore || null,
assessedAs: result.qualityScore
? result.qualityScore > 0.8
? "high"
: result.qualityScore > 0.5
? "medium"
: "low"
: "unknown",
};
console.log("Extraction Quality Report:");
console.log(` Content size: ${quality.contentLength} bytes`);
console.log(` Quality score: ${quality.qualityScore?.toFixed(3) || "N/A"}`);
console.log(` Assessment: ${quality.assessedAs}`);
if (quality.assessedAs === "low") {
console.log(
" Recommendation: Review raw text for encoding issues or consider alternative extraction",
);
}
```
```typescript title="WASM - Quality with OCR Fallback"
import init, { extractBytes } from "kreuzberg-wasm";
await init();
async function extractWithQualityCheck(
bytes: Uint8Array,
mimeType: string,
): Promise<{ content: string; quality: number | null; method: string }> {
const config = {
enableQualityProcessing: true,
};
const result = await extractBytes(bytes, mimeType, config);
const qualityScore = result.qualityScore || 0;
// If quality is low, consider text extraction failed or use OCR
if (qualityScore < 0.5) {
console.warn("Low quality text extraction - alternative processing recommended");
return {
content: result.content,
quality: qualityScore,
method: "degraded-text-extraction",
};
}
return {
content: result.content,
quality: qualityScore,
method: "text-extraction",
};
}
const bytes = new Uint8Array(buffer);
const extracted = await extractWithQualityCheck(bytes, "application/pdf");
console.log(`Extraction method: ${extracted.method}`);
console.log(`Quality score: ${extracted.quality?.toFixed(3)}`);
console.log(`Content preview: ${extracted.content.substring(0, 80)}...`);
```