Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

View File

@@ -0,0 +1,50 @@
import type { ExtractionResult } from "@kreuzberg/wasm";
import { extractBytes, initWasm } from "@kreuzberg/wasm";
interface ProcessingStep {
name: string;
process: (result: ExtractionResult) => Promise<ExtractionResult>;
}
async function createExtractionPipeline(
steps: ProcessingStep[],
bytes: Uint8Array,
mimeType: string,
) {
await initWasm();
let result = await extractBytes(bytes, mimeType);
for (const step of steps) {
console.log(`Executing step: ${step.name}`);
result = await step.process(result);
}
return result;
}
const pipeline: ProcessingStep[] = [
{
name: "Text Normalization",
process: async (result) => ({
...result,
content: result.content.replace(/\s+/g, " ").trim(),
}),
},
{
name: "Language Detection",
process: async (result) => result,
},
{
name: "Chunking",
process: async (result) => result,
},
];
createExtractionPipeline(
pipeline,
new Uint8Array(await fetch("doc.pdf").then((r) => r.arrayBuffer())),
"application/pdf",
)
.then((r) => console.log("Pipeline complete:", r))
.catch(console.error);