This commit is contained in:
33
docs/snippets/wasm/metadata/metadata-with-chunks.ts
Normal file
33
docs/snippets/wasm/metadata/metadata-with-chunks.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import type { ExtractionConfig } from "@kreuzberg/wasm";
|
||||
import { extractBytes, initWasm } from "@kreuzberg/wasm";
|
||||
|
||||
async function extractWithChunkMetadata() {
|
||||
await initWasm();
|
||||
|
||||
const bytes = new Uint8Array(await fetch("document.pdf").then((r) => r.arrayBuffer()));
|
||||
|
||||
const config: ExtractionConfig = {
|
||||
chunking: {
|
||||
maxChars: 500,
|
||||
chunkOverlap: 50,
|
||||
},
|
||||
};
|
||||
|
||||
const result = await extractBytes(bytes, "application/pdf", config);
|
||||
|
||||
console.log("Document Metadata:", result.metadata);
|
||||
|
||||
if (result.chunks) {
|
||||
result.chunks.forEach((chunk) => {
|
||||
console.log("Chunk Metadata:", {
|
||||
charStart: chunk.metadata.charStart,
|
||||
charEnd: chunk.metadata.charEnd,
|
||||
index: chunk.metadata.chunkIndex,
|
||||
total: chunk.metadata.totalChunks,
|
||||
tokens: chunk.metadata.tokenCount,
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
extractWithChunkMetadata().catch(console.error);
|
||||
Reference in New Issue
Block a user