Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,66 @@
import type { ExtractionConfig } from "@kreuzberg/wasm";
import { extractBytes, initWasm } from "@kreuzberg/wasm";
async function extractWithChunking() {
await initWasm();
const bytes = new Uint8Array(await fetch("book.pdf").then((r) => r.arrayBuffer()));
const config: ExtractionConfig = {
chunking: {
maxChars: 800,
chunkOverlap: 150,
splitOnNewlines: true,
splitOnSentences: true,
},
};
// Example: prepend heading context so each chunk carries its heading breadcrumb
const configWithHeadings: ExtractionConfig = {
chunking: {
chunkerType: "markdown",
maxChars: 800,
prependHeadingContext: true,
},
};
const result = await extractBytes(bytes, "application/pdf", config);
if (result.chunks) {
console.log(`Total chunks: ${result.chunks.length}`);
result.chunks.slice(0, 3).forEach((chunk, i) => {
console.log(`\nChunk ${i}:`);
console.log(`Chars: ${chunk.metadata.charStart}-${chunk.metadata.charEnd}`);
console.log(`Content: ${chunk.content.substring(0, 100)}...`);
});
}
}
async function extractWithPrependHeadingContext() {
await initWasm();
const bytes = new Uint8Array(await fetch("document.md").then((r) => r.arrayBuffer()));
const config: ExtractionConfig = {
chunking: {
chunkerType: "markdown",
maxChars: 800,
prependHeadingContext: true,
},
};
const result = await extractBytes(bytes, "text/markdown", config);
if (result.chunks) {
console.log(`Total chunks: ${result.chunks.length}`);
result.chunks.slice(0, 3).forEach((chunk, i) => {
// Each chunk's content is prefixed with its heading breadcrumb
console.log(`\nChunk ${i}: ${chunk.content.substring(0, 100)}...`);
});
}
}
extractWithChunking().catch(console.error);
extractWithPrependHeadingContext().catch(console.error);