Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

48
e2e/node/tests/async.test.ts generated Normal file
View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractBytes, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('async', () => { it('async_extract_bytes: Async extract_bytes call on PDF document', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); const result = await extractBytes(_content_content, "application/pdf", undefined); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(50);
}, 30000);
it('async_extract_bytes_empty_mime: extract_bytes empty MIME async', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytes(_content_content, "", undefined); }).rejects.toThrow();
}, 30000);
it('async_extract_bytes_invalid_mime: extract_bytes unsupported MIME async', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytes(_content_content, "application/x-nonexistent", undefined); }).rejects.toThrow();
}, 30000);
});

62
e2e/node/tests/batch.test.ts generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, batchExtractBytesSync, batchExtractBytes, batchExtractFiles, batchExtractFilesSync, BatchBytesItem, BatchFileItem, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('batch', () => { it('batch_bytes_invalid_mime: batch_extract_bytes_sync invalid MIME', () => { batchExtractBytesSync([{ content: new Uint8Array([72, 101, 108, 108, 111]), mimeType: "application/x-nonexistent" }], undefined);
}, 30000);
it('batch_extract_bytes_happy: batch_extract_bytes: happy path with mixed inputs', async () => { const result = await batchExtractBytes([{ content: new Uint8Array([72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33]), mimeType: "text/plain" }, { content: new Uint8Array([60, 104, 116, 109, 108, 62, 60, 98, 111, 100, 121, 62, 84, 101, 115, 116, 60, 47, 98, 111, 100, 121, 62, 60, 47, 104, 116, 109, 108, 62]), mimeType: "text/html" }], undefined); expect(result.length).toBeGreaterThanOrEqual(1);
}, 30000);
it('batch_extract_bytes_mixed_format: batch_extract_bytes: handles unsupported MIME gracefully', async () => { await batchExtractBytes([{ content: new Uint8Array([80, 68, 70, 32, 112, 108, 97, 99, 101, 104, 111, 108, 100, 101, 114]), mimeType: "application/x-unknown" }], undefined);
}, 30000);
it('batch_extract_bytes_sync_empty_list: batch_extract_bytes_sync: empty batch', () => { const result = batchExtractBytesSync([], undefined); expect(result.length).toBe(0);
}, 30000);
it('batch_extract_bytes_sync_invalid_mime: batch_extract_bytes_sync: unsupported MIME', () => { batchExtractBytesSync([{ content: new Uint8Array([100, 97, 116, 97]), mimeType: "application/x-unknown" }], undefined);
}, 30000);
it('batch_file_async_basic: Extract text from multiple files asynchronously', async () => { await batchExtractFiles([{ path: "pdf/fake_memo.pdf" }, { path: "text/fake_text.txt" }], undefined);
}, 30000);
it('batch_file_async_not_found: batch_extract_file async nonexistent', async () => { await batchExtractFiles([{ path: "/nonexistent/a.pdf" }], undefined);
}, 30000);
it('batch_file_not_found: batch_extract_file_sync nonexistent', () => { batchExtractFilesSync([{ path: "/nonexistent/a.pdf" }, { path: "/nonexistent/b.txt" }], undefined);
}, 30000);
it('batch_file_partial: batch_extract_file_sync mixed', () => { batchExtractFilesSync([{ path: "text/plain.txt" }, { path: "/nonexistent/missing.pdf" }], undefined);
}, 30000);
it('batch_file_sync_basic: Extract text from multiple files synchronously', () => { batchExtractFilesSync([{ path: "pdf/fake_memo.pdf" }, { path: "text/fake_text.txt" }], undefined);
}, 30000);
});

46
e2e/node/tests/code.test.ts generated Normal file
View File

@@ -0,0 +1,46 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractFileSync } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('code', () => { it('code_shebang_detection: Test language detection from shebang line via bytes input', () => { const result = extractFileSync("code/script.sh", "text/x-source-code", undefined); expect(result.mimeType.trim()).toBe("text/x-source-code");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(result.content).toContain("build");
expect(result.content).toContain("clean");
}, 30000);
});

123
e2e/node/tests/contract.test.ts generated Normal file
View File

@@ -0,0 +1,123 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractFileSync, extractBytesSync, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('contract', () => { it('api_batch_bytes_async: Tests async batch bytes extraction API (batch_extract_bytes)', async () => { const result = await extractFile("pdf/fake_memo.pdf", undefined, undefined); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(["May 5, 2023", "Mallori"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('api_batch_bytes_with_configs_async: Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)', async () => { const result = await extractFile("pdf/fake_memo.pdf", undefined, { outputFormat: "markdown" } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'metadata.output_format' not available on result type
}, 30000);
it('api_batch_file_async: Tests async batch file extraction API (batch_extract_file)', async () => { const result = await extractFile("pdf/fake_memo.pdf", undefined, undefined); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(["May 5, 2023", "Mallori"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('api_batch_file_with_configs_async: Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)', async () => { const result = await extractFile("pdf/fake_memo.pdf", undefined, { outputFormat: "markdown" } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'metadata.output_format' not available on result type
}, 30000);
it('api_extract_bytes_async: Tests async bytes extraction API (extract_bytes)', async () => { const result = await extractFile("pdf/fake_memo.pdf", undefined, undefined); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(["May 5, 2023", "Mallori"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('api_extract_file_async: Tests async file extraction API (extract_file)', async () => { const result = await extractFile("pdf/fake_memo.pdf", undefined, undefined); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(["May 5, 2023", "Mallori"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('config_chunking_prepend_heading_context: Tests markdown chunker prepends heading hierarchy to chunk content', () => { const result = extractFileSync("markdown/extraction_test.md", undefined, { chunking: { chunkerType: "markdown", maxChars: 300, maxOverlap: 50, prependHeadingContext: true } } as ExtractionConfig); expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'chunks' not available on result type
expect((result.chunks ?? []).every((c: { content?: string }) => !!c.content)).toBe(true); expect((result.chunks ?? []).every((c: { metadata?: { headingContext?: string } }) => c.metadata?.headingContext != null)).toBe(true); expect((result.chunks ?? []).at(0)?.metadata?.headingContext != null).toBe(true);
}, 30000);
it('config_document_structure_with_headings: Tests document structure with DOCX heading-driven nesting', () => { const result = extractFileSync("docx/fake.docx", undefined, { includeDocumentStructure: true } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
// skipped: field 'document' not available on result type
// skipped: field 'document.nodes' not available on result type
}, 30000);
it('config_element_types: Tests element-based result format with element type assertions on DOCX', () => { const result = extractFileSync("docx/unit_test_headers.docx", undefined, { resultFormat: "element_based" } as ExtractionConfig); expect(["application/vnd.openxmlformats-officedocument.wordprocessingml.document"].some((v) => result.mimeType.includes(v))).toBe(true);
// skipped: field 'elements' not available on result type
}, 30000);
it('config_extraction_timeout: Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions', () => { const result = extractFileSync("pdf/fake_memo.pdf", undefined, { extractionTimeoutSecs: 300 } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
}, 30000);
it('config_keywords: Tests keyword extraction via YAKE algorithm', () => { const result = extractFileSync("pdf/fake_memo.pdf", undefined, { keywords: { algorithm: "yake", maxKeywords: 10 } } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'keywords' not available on Node JsExtractionResult
// skipped: field 'keywords' not available on Node JsExtractionResult
}, 30000);
it('config_pages: Tests page extraction and page marker configuration', () => { const result = extractFileSync("pdf/fake_memo.pdf", undefined, { pages: { extractPages: true, insertPageMarkers: true } } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(["PAGE"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('config_quality_enabled: Tests quality scoring produces a score value in [0.0, 1.0]', () => { const result = extractFileSync("pdf/fake_memo.pdf", undefined, { enableQualityProcessing: true } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
}, 30000);
it('config_security_limits: Tests archive extraction with custom security limits', () => { const result = extractFileSync("archives/documents.zip", undefined, { securityLimits: { maxArchiveSize: 104857600, maxCompressionRatio: 50, maxFilesInArchive: 100 } } as ExtractionConfig); expect(["application/zip", "application/x-zip-compressed"].some((v) => result.mimeType.includes(v))).toBe(true);
expect(result.content.length).toBeGreaterThanOrEqual(10);
}, 30000);
it('config_tree_sitter: Tests tree-sitter configuration round-trip', () => { const result = extractFileSync("code/hello.py", undefined, { treeSitter: { groups: ["web"], languages: ["python", "rust"], process: { comments: false, diagnostics: false, docstrings: false, exports: true, imports: true, structure: true, symbols: false } } } as ExtractionConfig); expect(result.mimeType.trim()).toBe("text/x-source-code");
expect(result.content.length).toBeGreaterThanOrEqual(5);
}, 30000);
it('output_format_bytes_markdown: Tests markdown output format via bytes extraction API', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); const result = extractBytesSync(_content_content, "application/pdf", { outputFormat: "markdown" } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'metadata.output_format' not available on result type
}, 30000);
it('output_format_markdown: Tests Markdown output format', () => { const result = extractFileSync("pdf/fake_memo.pdf", undefined, { outputFormat: "markdown" } as ExtractionConfig); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(10);
// skipped: field 'metadata.output_format' not available on result type
}, 30000);
});

48
e2e/node/tests/detection.test.ts generated Normal file
View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, detectMimeTypeFromBytes, getExtensionsForMime } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('detection', () => { it('detect_mime_bytes_html: Detect HTML MIME from bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('html/html.html'); detectMimeTypeFromBytes(_content_content);
}, 30000);
it('detect_mime_bytes_pdf: Detect PDF MIME type from bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); detectMimeTypeFromBytes(_content_content);
}, 30000);
it('detect_mime_bytes_png: Detect PNG MIME type from bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('images/test_hello_world.png'); detectMimeTypeFromBytes(_content_content);
}, 30000);
it('get_extensions_unknown_mime: get_extensions unknown MIME', async () => { await expect(async () => { await getExtensionsForMime("application/x-totally-unknown"); }).rejects.toThrow();
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearDocumentExtractors, listDocumentExtractors } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('document_extractor_management', () => { it('document_extractors_clear: Clear all document extractors and verify list is empty', () => { clearDocumentExtractors();
}, 30000);
it('extractors_list: List all registered document extractors', () => { listDocumentExtractors();
}, 30000);
});

View File

@@ -0,0 +1,48 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, embedTextsAsync, type EmbeddingConfig, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('embed_async_pending', () => { it('embed_texts_async_empty_input: embed_texts_async: empty text list', async () => { const result = await embedTextsAsync([], undefined); expect(result.length).toBe(0);
}, 600000);
it('embed_texts_async_happy: embed_texts_async: basic async embedding', async () => { const result = await embedTextsAsync(["First", "Second"], undefined); expect(result.length).toBeGreaterThanOrEqual(2);
}, 600000);
it('embed_texts_async_preset_switch: embed_texts_async: preset override', async () => { await embedTextsAsync(["Text"], { model: { name: "balanced", type: "preset" } } as ExtractionConfig);
}, 600000);
});

42
e2e/node/tests/embed_extra.test.ts generated Normal file
View File

@@ -0,0 +1,42 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, embedTexts, type EmbeddingConfig, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('embed_extra', () => { it('embed_texts_batch: Batch embed texts', () => { embedTexts(["Hello", "World"], { model: { name: "balanced", type: "preset" } } as ExtractionConfig);
}, 600000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearEmbeddingBackends, listEmbeddingBackends } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('embedding_backend_management', () => { it('embedding_backends_clear: Clear all embedding backends and verify list is empty', () => { clearEmbeddingBackends();
}, 600000);
it('embedding_backends_list: List all registered embedding backends', () => { listEmbeddingBackends();
}, 600000);
});

61
e2e/node/tests/embeddings.test.ts generated Normal file
View File

@@ -0,0 +1,61 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, embedTexts, getEmbeddingPreset, listEmbeddingPresets, type EmbeddingConfig, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('embeddings', () => { it('embed_texts_different_preset: embed_texts: multilingual preset', () => { const result = embedTexts(["Hello world", "Test"], { model: { name: "multilingual", type: "preset" } } as ExtractionConfig); expect(result.length).toBeGreaterThanOrEqual(2);
}, 600000);
it('get_embedding_preset_known: get_embedding_preset: known preset', () => { getEmbeddingPreset("balanced");
}, 600000);
it('get_embedding_preset_nominal: get_embedding_preset: nominal case', () => { getEmbeddingPreset("balanced");
}, 600000);
it('get_embedding_preset_unknown: get_embedding_preset: unknown preset fails', () => { const result = getEmbeddingPreset("nonexistent-xyz"); expect((result ?? "").length).toBe(0);
}, 600000);
it('list_embedding_presets_sanity: list_embedding_presets: returns at least one', () => { const result = listEmbeddingPresets(); {
const _v = result;
if (typeof _v === "string" || Array.isArray(_v)) {
expect(_v.length).toBeGreaterThan(0);
} else {
expect(_v).toBeDefined();
expect(_v).not.toBeNull();
}
}
}, 600000);
});

50
e2e/node/tests/error.test.ts generated Normal file
View File

@@ -0,0 +1,50 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractBytesSync, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('error', () => { it('error_empty_bytes: Graceful handling of empty bytes (should not error)', async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/empty.txt'); extractBytesSync(_content_content, "text/plain", undefined);
}, 30000);
it('error_empty_mime: Error when extracting with empty MIME type', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytesSync(_content_content, "", undefined); }).rejects.toThrow();
}, 30000);
it('error_extract_bytes_conflicting_ocr: extract_bytes force+disable OCR', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/fake_text.txt'); await extractBytesSync(_content_content, "text/plain", { disableOcr: true, forceOcr: true } as ExtractionConfig); }).rejects.toThrow();
}, 30000);
it('error_invalid_mime_format: Error when extracting with invalid MIME type format', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytesSync(_content_content, "not-a-mime", undefined); }).rejects.toThrow();
}, 30000);
it('error_unsupported_mime: Error when extracting with unsupported MIME type', async () => { await expect(async () => { const _content_content = await (await import('node:fs/promises')).readFile('text/plain.txt'); await extractBytesSync(_content_content, "application/x-nonexistent", undefined); }).rejects.toThrow();
}, 30000);
});

55
e2e/node/tests/format_specific.test.ts generated Normal file
View File

@@ -0,0 +1,55 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractBytesSync, extractFileSync } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('format_specific', () => { it('format_docx_standalone: Standalone DOCX extraction using extract_bytes_sync', async () => { const _content_content = await (await import('node:fs/promises')).readFile('docx/fake.docx'); const result = extractBytesSync(_content_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", undefined); expect(result.content.length).toBeGreaterThanOrEqual(20);
}, 30000);
it('format_hwpx_standalone: Standalone HWPX extraction using extract_bytes_sync', async () => { const _content_content = await (await import('node:fs/promises')).readFile('hwpx/simple.hwpx'); const result = extractBytesSync(_content_content, "application/haansofthwpx", undefined); expect(result.content.length).toBeGreaterThanOrEqual(20);
expect(result.content).toContain("Hello from HWPX");
}, 30000);
it('format_pdf_text: Standalone PDF text extraction using extract_bytes_sync', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); const result = extractBytesSync(_content_content, "application/pdf", undefined); expect(result.content.length).toBeGreaterThanOrEqual(50);
expect(["Mallori", "May"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('format_pptx: PPTX presentation extraction using extract_file_sync', () => { extractFileSync("pptx/simple.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", undefined);
}, 30000);
it('format_xlsx: XLSX spreadsheet extraction using extract_file_sync', () => { extractFileSync("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", undefined);
}, 30000);
});

49
e2e/node/tests/mime_utilities.test.ts generated Normal file
View File

@@ -0,0 +1,49 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, detectMimeTypeFromBytes, getExtensionsForMime } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('mime_utilities', () => { it('mime_detect_bytes: Detect MIME type from file bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); const result = detectMimeTypeFromBytes(_content_content); // skipped: field 'result' not applicable for simple result type
}, 30000);
it('mime_detect_image: Detect MIME type from PNG image bytes', async () => { const _content_content = await (await import('node:fs/promises')).readFile('images/test_hello_world.png'); const result = detectMimeTypeFromBytes(_content_content); // skipped: field 'result' not applicable for simple result type
}, 30000);
it('mime_get_extensions: Get file extensions for a MIME type', () => { const result = getExtensionsForMime("application/pdf"); // skipped: field 'result' not applicable for simple result type
}, 30000);
});

View File

@@ -0,0 +1,46 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearOcrBackends, listOcrBackends, unregisterOcrBackend } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('ocr_backend_management', () => { it('ocr_backends_clear: Clear all OCR backends and verify list is empty', () => { clearOcrBackends();
}, 30000);
it('ocr_backends_list: List all registered OCR backends', () => { listOcrBackends();
}, 30000);
it('ocr_backends_unregister: Unregister nonexistent OCR backend gracefully', () => { unregisterOcrBackend("nonexistent-backend-xyz");
}, 30000);
});

45
e2e/node/tests/pdf.test.ts generated Normal file
View File

@@ -0,0 +1,45 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, renderPdfPageToPng } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('pdf', () => { it('render_pdf_page_first: render_pdf_page_to_png: first page', async () => { const _pdf_bytes_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); const result = renderPdfPageToPng(_pdf_bytes_content, 0); expect(result.length).toBeGreaterThanOrEqual(100);
}, 30000);
it('render_pdf_page_out_of_range: render_pdf_page_to_png: page out of range', async () => { await expect(async () => { const _pdf_bytes_content = await (await import('node:fs/promises')).readFile('pdf/fake_memo.pdf'); await renderPdfPageToPng(_pdf_bytes_content, 999); }).rejects.toThrow();
}, 30000);
});

104
e2e/node/tests/plugin_api.test.ts generated Normal file
View File

@@ -0,0 +1,104 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, registerDocumentExtractor, registerEmbeddingBackend, registerOcrBackend, registerPostProcessor, registerRenderer, registerValidator, unregisterDocumentExtractor, unregisterEmbeddingBackend, unregisterPostProcessor, unregisterRenderer, unregisterValidator } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('plugin_api', () => { it('register_document_extractor_trait_bridge: register_document_extractor: trait bridge', () => { class _TestStub_register_document_extractor_trait_bridge {
name(): string { return "test-extractor"; }
async extract_bytes(_p0?: any, _p1?: any, _p2?: any): Promise<string> { return "{}"; }
async extract_file(_p0?: any, _p1?: any, _p2?: any): Promise<string> { return "{}"; }
supported_mime_types(): string { return []; }
priority(): number { return 1; }
can_handle(_p0?: any, _p1?: any): boolean { return false; }
}
registerDocumentExtractor(new _TestStub_register_document_extractor_trait_bridge());
}, 30000);
it('register_embedding_backend_trait_bridge: register_embedding_backend: trait bridge', () => { class _TestStub_register_embedding_backend_trait_bridge {
name(): string { return "test-embedding-backend"; }
dimensions(): number { return 1; }
async embed(_p0?: any): Promise<string> { return []; }
}
registerEmbeddingBackend(new _TestStub_register_embedding_backend_trait_bridge());
}, 600000);
it('register_ocr_backend_trait_bridge: register_ocr_backend: trait bridge', () => { class _TestStub_register_ocr_backend_trait_bridge {
name(): string { return "test-backend"; }
async process_image(_p0?: any, _p1?: any): Promise<string> { return "{}"; }
async process_image_file(_p0?: any, _p1?: any): Promise<string> { return "{}"; }
supports_language(_p0?: any): boolean { return false; }
backend_type(): string { return "{}"; }
supported_languages(): string { return []; }
supports_table_detection(): boolean { return false; }
supports_document_processing(): boolean { return false; }
async process_document(_p0?: any, _p1?: any): Promise<string> { return "{}"; }
}
registerOcrBackend(new _TestStub_register_ocr_backend_trait_bridge());
}, 30000);
it('register_post_processor_trait_bridge: register_post_processor: trait bridge', () => { class _TestStub_register_post_processor_trait_bridge {
name(): string { return "test-processor"; }
async process(_p0?: any, _p1?: any): Promise<void> { return undefined; }
processing_stage(): string { return "{}"; }
should_process(_p0?: any, _p1?: any): boolean { return false; }
estimated_duration_ms(_p0?: any): number { return 1; }
priority(): number { return 1; }
}
registerPostProcessor(new _TestStub_register_post_processor_trait_bridge());
}, 30000);
it('register_renderer_trait_bridge: register_renderer: trait bridge', () => { class _TestStub_register_renderer_trait_bridge {
name(): string { return "test-renderer"; }
render(_p0?: any): string { return ""; }
}
registerRenderer(new _TestStub_register_renderer_trait_bridge());
}, 30000);
it('register_validator_trait_bridge: register_validator: trait bridge', () => { class _TestStub_register_validator_trait_bridge {
name(): string { return "test-validator"; }
async validate(_p0?: any, _p1?: any): Promise<void> { return undefined; }
should_validate(_p0?: any, _p1?: any): boolean { return false; }
priority(): number { return 1; }
}
registerValidator(new _TestStub_register_validator_trait_bridge());
}, 30000);
it('unregister_document_extractor_after_register: unregister_document_extractor', () => { unregisterDocumentExtractor("test-extractor");
}, 30000);
it('unregister_embedding_backend_after_register: unregister_embedding_backend', () => { unregisterEmbeddingBackend("test-embedding-backend");
}, 600000);
it('unregister_post_processor_after_register: unregister_post_processor', () => { unregisterPostProcessor("test-processor");
}, 30000);
it('unregister_renderer_after_register: unregister_renderer', () => { unregisterRenderer("test-renderer");
}, 30000);
it('unregister_validator_after_register: unregister_validator', () => { unregisterValidator("test-validator");
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearPostProcessors, listPostProcessors } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('post_processor_management', () => { it('post_processors_clear: Clear all post-processors and verify list is empty', () => { clearPostProcessors();
}, 30000);
it('post_processors_list: List all registered post-processors', () => { listPostProcessors();
}, 30000);
});

52
e2e/node/tests/registry.test.ts generated Normal file
View File

@@ -0,0 +1,52 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, listDocumentExtractors, listEmbeddingBackends, listOcrBackends, listPostProcessors, listRenderers, listValidators } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('registry', () => { it('list_document_extractors: List document extractors', () => { listDocumentExtractors();
}, 30000);
it('list_embedding_backends: List embedding backends', () => { listEmbeddingBackends();
}, 600000);
it('list_ocr_backends: List OCR backends', () => { listOcrBackends();
}, 30000);
it('list_post_processors: List post-processors', () => { listPostProcessors();
}, 30000);
it('list_renderers: List renderers', () => { listRenderers();
}, 30000);
it('list_validators: List validators', () => { listValidators();
}, 30000);
});

View File

@@ -0,0 +1,46 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, getExtensionsForMime } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('registry_operations', () => { it('extensions_docx: Get file extensions for DOCX MIME type', () => { getExtensionsForMime("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
}, 30000);
it('extensions_html: Get file extensions for HTML MIME type', () => { getExtensionsForMime("text/html");
}, 30000);
it('extensions_pdf: Get file extensions for PDF MIME type', () => { getExtensionsForMime("application/pdf");
}, 30000);
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearRenderers, listRenderers } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('renderer_management', () => { it('renderers_clear: Clear all renderers and verify list is empty', () => { clearRenderers();
}, 30000);
it('renderers_list: List all registered renderers', () => { listRenderers();
}, 30000);
});

87
e2e/node/tests/smoke.test.ts generated Normal file
View File

@@ -0,0 +1,87 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, extractBytes, type ExtractionConfig } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('smoke', () => { it('ocr_image_png: OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.', async () => { const _content_content = await (await import('node:fs/promises')).readFile('images/test_hello_world.png'); const result = await extractBytes(_content_content, "image/png", undefined); expect(result.mimeType.trim()).toBe("image/png");
expect(result.content.length).toBeGreaterThanOrEqual(1);
expect(["Hello", "World", "hello", "world"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('smoke_docx_basic: Smoke test: DOCX with formatted text', async () => { const result = await extractFile("docx/fake.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", undefined); expect(result.mimeType.trim()).toBe("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
expect(result.content.length).toBeGreaterThanOrEqual(20);
expect(["Lorem", "ipsum", "document", "text"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('smoke_html_basic: Smoke test: HTML table extraction', async () => { const result = await extractFile("html/simple_table.html", "text/html", undefined); expect(result.mimeType.trim()).toBe("text/html");
expect(result.content.length).toBeGreaterThanOrEqual(10);
expect(["Sample Data Table", "Laptop", "Electronics", "Product"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('smoke_image_png: Smoke test: PNG image (without OCR, metadata only)', async () => { const result = await extractFile("images/sample.png", undefined, { disableOcr: true } as ExtractionConfig); expect(result.mimeType.trim()).toBe("image/png");
}, 30000);
it('smoke_json_basic: Smoke test: JSON file extraction', async () => { const result = await extractFile("json/simple.json", "application/json", undefined); expect(result.mimeType.trim()).toBe("application/json");
expect(result.content.length).toBeGreaterThanOrEqual(5);
}, 30000);
it('smoke_pdf_basic: Smoke test: PDF with simple text extraction', async () => { const result = await extractFile("pdf/fake_memo.pdf", "application/pdf", undefined); expect(result.mimeType.trim()).toBe("application/pdf");
expect(result.content.length).toBeGreaterThanOrEqual(50);
expect(["May 5, 2023", "To Whom it May Concern"].some((v) => result.content.includes(v))).toBe(true);
}, 30000);
it('smoke_txt_basic: Smoke test: Plain text file', async () => { const result = await extractFile("text/report.txt", "text/plain", undefined); expect(result.mimeType.trim()).toBe("text/plain");
expect(result.content.length).toBeGreaterThanOrEqual(5);
}, 30000);
it('smoke_xlsx_basic: Smoke test: XLSX with basic spreadsheet data including tables', async () => { const result = await extractFile("xlsx/stanley_cups.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", undefined); expect(result.mimeType.trim()).toBe("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
expect(result.content.length).toBeGreaterThanOrEqual(100);
expect(result.content).toContain("Team");
expect(result.content).toContain("Location");
expect(result.content).toContain("Stanley Cups");
expect(result.content).toContain("Blues");
expect(result.content).toContain("Flyers");
expect(result.content).toContain("Maple Leafs");
expect(result.content).toContain("STL");
expect(result.content).toContain("PHI");
expect(result.content).toContain("TOR");
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}, 30000);
});

173
e2e/node/tests/test_plugins.test.ts generated Normal file
View File

@@ -0,0 +1,173 @@
/**
* Bridge registry error-path tests for document extractor and renderer plugins.
*
* These tests cover the observable behaviour of register/unregister/clear at
* the TypeScript/Node layer. register_document_extractor and register_renderer
* are generated by the alef trait-bridge codegen and present on the native
* kreuzberg module at runtime; they do not appear in the curated index.d.ts
* re-export because the TypeScript wrapper only surfaces a subset of the API.
*
* A DocumentExtractor bridge object must expose:
* name(): string
* extract_bytes(content, mimeType, configJson): string (JSON InternalDocument)
* supported_mime_types(): string[]
*
* A Renderer bridge object must expose:
* name(): string
* render(docJson): string
*/
import { describe, it, expect } from "vitest";
import { extractBytesSync, listDocumentExtractors, listRenderers } from "kreuzberg";
// The register/unregister/clear functions are exported by the native module but
// not re-typed in the public TypeScript wrapper. Import the native binding
// directly so we can reach the full API surface without 'any' sprawl.
import kreuzberg from "kreuzberg";
const native = kreuzberg as unknown as Record<string, (...args: unknown[]) => unknown>;
function registerDocumentExtractor(obj: unknown): void {
(native["registerDocumentExtractor"] as (o: unknown) => void)(obj);
}
function unregisterDocumentExtractor(name: string): void {
(native["unregisterDocumentExtractor"] as (n: string) => void)(name);
}
function clearDocumentExtractors(): void {
(native["clearDocumentExtractors"] as () => void)();
}
function registerRenderer(obj: unknown): void {
(native["registerRenderer"] as (o: unknown) => void)(obj);
}
function unregisterRenderer(name: string): void {
(native["unregisterRenderer"] as (n: string) => void)(name);
}
function clearRenderers(): void {
(native["clearRenderers"] as () => void)();
}
// ---------------------------------------------------------------------------
// Minimal stub factory helpers
// ---------------------------------------------------------------------------
function makeExtractor(name: string, mimeType = "application/x-test"): object {
return {
name: (): string => name,
version: (): string => "0.0.1",
initialize: (): void => {
/* no-op */
},
shutdown: (): void => {
/* no-op */
},
supported_mime_types: (): string[] => [mimeType],
extract_bytes: (_content: Uint8Array, _mimeType: string, _configJson: string): string =>
JSON.stringify({
source_format: "plain",
mime_type: "text/plain",
elements: [],
relationships: [],
images: [],
tables: [],
}),
};
}
function makeRenderer(name: string): object {
return {
name: (): string => name,
version: (): string => "0.0.1",
initialize: (): void => {
/* no-op */
},
shutdown: (): void => {
/* no-op */
},
render: (_docJson: string): string => "rendered",
};
}
// ---------------------------------------------------------------------------
// DocumentExtractor tests
// ---------------------------------------------------------------------------
describe("plugins: document extractor registry", () => {
it("register_duplicate_extractor_replaces: second registration silently replaces first", () => {
const name = "_test_ts_dup_extractor";
try {
registerDocumentExtractor(makeExtractor(name, "application/x-ts-dup1"));
registerDocumentExtractor(makeExtractor(name, "application/x-ts-dup2"));
const listed = listDocumentExtractors();
const count = listed.filter((n) => n === name).length;
expect(count).toBe(1);
} finally {
unregisterDocumentExtractor(name);
}
});
it("unregister_unknown_extractor_returns_ok: unregistering unknown name is a no-op", () => {
// Must not throw
expect(() => {
unregisterDocumentExtractor("_test_ts_never_registered_extractor_xyz");
}).not.toThrow();
});
it("clear_then_list_extractor_empty: list is empty after clear", () => {
registerDocumentExtractor(makeExtractor("_test_ts_clear_a", "application/x-ts-clear-a"));
registerDocumentExtractor(makeExtractor("_test_ts_clear_b", "application/x-ts-clear-b"));
clearDocumentExtractors();
const listed = listDocumentExtractors();
expect(listed).toEqual([]);
});
it("extract_after_unregister_uses_builtin: built-in extractor is used after custom removed", () => {
const name = "_test_ts_unreg_plain";
registerDocumentExtractor(makeExtractor(name, "text/plain"));
unregisterDocumentExtractor(name);
// Must not throw; falls back to the built-in plain-text extractor.
const encoded = new TextEncoder().encode("hello world");
const result = extractBytesSync(encoded, "text/plain", undefined);
expect(result).toBeDefined();
});
});
// ---------------------------------------------------------------------------
// Renderer tests
// ---------------------------------------------------------------------------
describe("plugins: renderer registry", () => {
it("register_duplicate_renderer_replaces: second registration silently replaces first", () => {
const name = "_test_ts_dup_renderer";
try {
registerRenderer(makeRenderer(name));
registerRenderer(makeRenderer(name));
const listed = listRenderers();
const count = listed.filter((n) => n === name).length;
expect(count).toBe(1);
} finally {
unregisterRenderer(name);
}
});
it("unregister_unknown_renderer_returns_ok: unregistering unknown name is a no-op", () => {
expect(() => {
unregisterRenderer("_test_ts_never_registered_renderer_xyz");
}).not.toThrow();
});
it("clear_then_list_renderer_empty: list is empty after clear", () => {
registerRenderer(makeRenderer("_test_ts_renderer_clear_a"));
registerRenderer(makeRenderer("_test_ts_renderer_clear_b"));
clearRenderers();
const listed = listRenderers();
expect(listed).toEqual([]);
});
it("list_renderers_after_unregister_does_not_include_removed: name absent after unregister", () => {
const name = "_test_ts_unregister_renderer_check";
registerRenderer(makeRenderer(name));
expect(listRenderers()).toContain(name);
unregisterRenderer(name);
expect(listRenderers()).not.toContain(name);
});
});

View File

@@ -0,0 +1,44 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
import { describe, expect, it } from 'vitest';import { extractFile, clearValidators, listValidators } from '@kreuzberg/node';
function _alefE2eText(value: unknown): string {
return value == null ? "" : String(value);
}
function _alefE2eItemTexts(item: unknown): string[] {
if (item == null || typeof item !== "object") {
return [_alefE2eText(item)];
}
const record = item as Record<string, unknown>;
const itemsText = Array.isArray(record.items) ? record.items.map(_alefE2eText).join(" ") : "";
return [_alefE2eText(item), _alefE2eText(record.kind), _alefE2eText(record.name), _alefE2eText(record.source), _alefE2eText(record.alias), _alefE2eText(record.text), _alefE2eText(record.signature), itemsText];
}
function _alefE2eFormatMetadataDisplay(fm: unknown): string {
if (fm == null) return "";
if (typeof fm !== "object") return String(fm);
const record = fm as Record<string, unknown>;
const formatType = record.format_type;
// FormatMetadata is a tagged union: { format_type: 'image', image: { format: 'PNG', ... }, ... }
// Extract the display string based on the variant type
if (formatType === "image" && typeof record.image === "object") {
const imageData = record.image as Record<string, unknown>;
if (typeof imageData.format === "string") return imageData.format;
}
// Fallback: return format_type variant name
if (typeof record.format_type === "string") return record.format_type;
return "";
}
describe('validator_management', () => { it('validators_clear: Clear all validators and verify list is empty', () => { clearValidators();
}, 30000);
it('validators_list: List all registered validators', () => { listValidators();
}, 30000);
});