Files
fil/docs/snippets/wasm/advanced/quality_processing_config.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

2.8 KiB

import init, { extractBytes } from "kreuzberg-wasm";

await init();

const config = {
  enableQualityProcessing: true,
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "application/pdf", config);

console.log(`Quality score: ${result.qualityScore?.toFixed(3) || "N/A"}`);
console.log(`Content: ${result.content.substring(0, 100)}...`);

// Quality score indicates text extraction quality (0.0-1.0)
if (result.qualityScore && result.qualityScore < 0.5) {
  console.warn("Low quality extraction detected - consider OCR or alternative processing");
}
import init, { extractBytes } from "kreuzberg-wasm";

await init();

interface ExtractionQuality {
  contentLength: number;
  qualityScore: number | null;
  assessedAs: string;
}

const config = {
  enableQualityProcessing: true,
};

const bytes = new Uint8Array(buffer);
const result = await extractBytes(bytes, "application/pdf", config);

const quality: ExtractionQuality = {
  contentLength: result.content.length,
  qualityScore: result.qualityScore || null,
  assessedAs: result.qualityScore
    ? result.qualityScore > 0.8
      ? "high"
      : result.qualityScore > 0.5
        ? "medium"
        : "low"
    : "unknown",
};

console.log("Extraction Quality Report:");
console.log(`  Content size: ${quality.contentLength} bytes`);
console.log(`  Quality score: ${quality.qualityScore?.toFixed(3) || "N/A"}`);
console.log(`  Assessment: ${quality.assessedAs}`);

if (quality.assessedAs === "low") {
  console.log(
    "  Recommendation: Review raw text for encoding issues or consider alternative extraction",
  );
}
import init, { extractBytes } from "kreuzberg-wasm";

await init();

async function extractWithQualityCheck(
  bytes: Uint8Array,
  mimeType: string,
): Promise<{ content: string; quality: number | null; method: string }> {
  const config = {
    enableQualityProcessing: true,
  };

  const result = await extractBytes(bytes, mimeType, config);
  const qualityScore = result.qualityScore || 0;

  // If quality is low, consider text extraction failed or use OCR
  if (qualityScore < 0.5) {
    console.warn("Low quality text extraction - alternative processing recommended");
    return {
      content: result.content,
      quality: qualityScore,
      method: "degraded-text-extraction",
    };
  }

  return {
    content: result.content,
    quality: qualityScore,
    method: "text-extraction",
  };
}

const bytes = new Uint8Array(buffer);
const extracted = await extractWithQualityCheck(bytes, "application/pdf");

console.log(`Extraction method: ${extracted.method}`);
console.log(`Quality score: ${extracted.quality?.toFixed(3)}`);
console.log(`Content preview: ${extracted.content.substring(0, 80)}...`);