This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
```typescript title="WASM"
|
||||
import init, { extractBytes } from "kreuzberg-wasm";
|
||||
import { LanguageDetectionConfig, ExtractionConfig } from "kreuzberg-wasm";
|
||||
|
||||
await init();
|
||||
|
||||
const fileBuffer = new Uint8Array(/* your file bytes */);
|
||||
const mimeType = "text/plain";
|
||||
|
||||
const config = new ExtractionConfig({
|
||||
language_detection: new LanguageDetectionConfig({
|
||||
enable_detection: true,
|
||||
target_languages: ["en", "de", "fr", "es", "it", "ja", "zh"],
|
||||
confidence_threshold: 0.5,
|
||||
}),
|
||||
});
|
||||
|
||||
const result = await extractBytes(fileBuffer, mimeType, config);
|
||||
|
||||
if (result.detected_languages && result.detected_languages.length > 0) {
|
||||
console.log("Document languages:", result.detected_languages.join(", "));
|
||||
|
||||
// Process multi-language content
|
||||
result.detected_languages.forEach((lang) => {
|
||||
console.log(`Language detected: ${lang}`);
|
||||
});
|
||||
|
||||
// Access metadata for language info
|
||||
if (result.metadata && result.metadata.language) {
|
||||
console.log(`Primary metadata language: ${result.metadata.language}`);
|
||||
}
|
||||
} else {
|
||||
console.log("No languages detected");
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user