Files
fil/docs/snippets/wasm/metadata/metadata-with-chunks.ts

34 lines
899 B
TypeScript
Raw Normal View History

2026-06-01 23:40:55 +02:00
import type { ExtractionConfig } from "@kreuzberg/wasm";
import { extractBytes, initWasm } from "@kreuzberg/wasm";
async function extractWithChunkMetadata() {
await initWasm();
const bytes = new Uint8Array(await fetch("document.pdf").then((r) => r.arrayBuffer()));
const config: ExtractionConfig = {
chunking: {
maxChars: 500,
chunkOverlap: 50,
},
};
const result = await extractBytes(bytes, "application/pdf", config);
console.log("Document Metadata:", result.metadata);
if (result.chunks) {
result.chunks.forEach((chunk) => {
console.log("Chunk Metadata:", {
charStart: chunk.metadata.charStart,
charEnd: chunk.metadata.charEnd,
index: chunk.metadata.chunkIndex,
total: chunk.metadata.totalChunks,
tokens: chunk.metadata.tokenCount,
});
});
}
}
extractWithChunkMetadata().catch(console.error);