Files
fil/docs/snippets/wasm/metadata/page_tracking_basic.md

39 lines
1006 B
Markdown
Raw Normal View History

2026-06-01 23:40:55 +02:00
```typescript title="WASM"
import init, { extractBytes } from "kreuzberg-wasm";
import { PageConfig, ExtractionConfig } from "kreuzberg-wasm";
await init();
const fileBuffer = new Uint8Array(/* your file bytes */);
const mimeType = "application/pdf";
const config = new ExtractionConfig({
pages: new PageConfig({
extract_pages: true,
}),
});
const result = await extractBytes(fileBuffer, mimeType, config);
if (result.pages) {
console.log(`Total pages extracted: ${result.pages.length}`);
result.pages.forEach((page) => {
console.log(`Page ${page.pageNumber}:`);
console.log(` Content length: ${page.content.length} chars`);
console.log(` Tables: ${page.tables.length}`);
console.log(` Images: ${page.images.length}`);
// Check if page is blank
if (page.isBlank) {
console.log(" This page is blank");
}
// Access page hierarchy if available
if (page.hierarchy) {
console.log(` Hierarchy level: ${page.hierarchy}`);
}
});
}
```