Files
fil/docs/snippets/wasm/metadata/page_boundaries.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.1 KiB

import init, { extractBytes } from "kreuzberg-wasm";
import { PageConfig, ExtractionConfig } from "kreuzberg-wasm";

await init();

const fileBuffer = new Uint8Array(/* your file bytes */);
const mimeType = "application/pdf";

const config = new ExtractionConfig({
  pages: new PageConfig({
    extract_pages: true,
  }),
});

const result = await extractBytes(fileBuffer, mimeType, config);

if (result.metadata && result.metadata.pages) {
  const pageStructure = result.metadata.pages;
  console.log(`Total pages: ${pageStructure.total_count}`);

  if (pageStructure.boundaries) {
    // Iterate through page boundaries to map content to pages
    pageStructure.boundaries.forEach((boundary) => {
      const pageText = result.content.substring(
        boundary.byte_start,
        Math.min(boundary.byte_end, boundary.byte_start + 100),
      );

      console.log(`Page ${boundary.page_number}:`);
      console.log(`  Byte range: ${boundary.byte_start}-${boundary.byte_end}`);
      console.log(`  Preview: ${pageText}...`);
    });
  }
}