Nomad changes
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s

This commit is contained in:
Henrik Jess Nielsen
2026-06-01 23:40:55 +02:00
parent 72b1a0a6ed
commit b4c07d3693
5723 changed files with 1130655 additions and 0 deletions

58
e2e/go/async_test.go generated Normal file
View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: async
package e2e_test
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_AsyncExtractBytes(t *testing.T) {
// Async extract_bytes call on PDF document
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytes(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
}
func Test_AsyncExtractBytesEmptyMime(t *testing.T) {
// extract_bytes empty MIME async
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytes(contentBytes, ``, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_AsyncExtractBytesInvalidMime(t *testing.T) {
// extract_bytes unsupported MIME async
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytes(contentBytes, `application/x-nonexistent`, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

139
e2e/go/batch_test.go generated Normal file
View File

@@ -0,0 +1,139 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: batch
package e2e_test
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_BatchBytesInvalidMime(t *testing.T) {
// batch_extract_bytes_sync invalid MIME
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"SGVsbG8=","mime_type":"application/x-nonexistent"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchExtractBytesHappy(t *testing.T) {
// batch_extract_bytes: happy path with mixed inputs
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"SGVsbG8sIHdvcmxkIQ==","mime_type":"text/plain"},{"content":"PGh0bWw+PGJvZHk+VGVzdDwvYm9keT48L2h0bWw+","mime_type":"text/html"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.BatchExtractBytes(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result), 1, "expected at least 1 elements")
}
func Test_BatchExtractBytesMixedFormat(t *testing.T) {
// batch_extract_bytes: handles unsupported MIME gracefully
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"UERGIHBsYWNlaG9sZGVy","mime_type":"application/x-unknown"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractBytes(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchExtractBytesSyncEmptyList(t *testing.T) {
// batch_extract_bytes_sync: empty batch
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.Equal(t, len(result), 0, "expected exactly 0 elements")
}
func Test_BatchExtractBytesSyncInvalidMime(t *testing.T) {
// batch_extract_bytes_sync: unsupported MIME
var items []kreuzberg.BatchBytesItem
if err := json.Unmarshal([]byte(`[{"content":"ZGF0YQ==","mime_type":"application/x-unknown"}]`), &items); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractBytesSync(items, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileAsyncBasic(t *testing.T) {
// Extract text from multiple files asynchronously
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"pdf/fake_memo.pdf"},{"path":"text/fake_text.txt"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFiles(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileAsyncNotFound(t *testing.T) {
// batch_extract_file async nonexistent
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"/nonexistent/a.pdf"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFiles(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileNotFound(t *testing.T) {
// batch_extract_file_sync nonexistent
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"/nonexistent/a.pdf"},{"path":"/nonexistent/b.txt"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFilePartial(t *testing.T) {
// batch_extract_file_sync mixed
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"text/plain.txt"},{"path":"/nonexistent/missing.pdf"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_BatchFileSyncBasic(t *testing.T) {
// Extract text from multiple files synchronously
var paths []kreuzberg.BatchFileItem
if err := json.Unmarshal([]byte(`[{"path":"pdf/fake_memo.pdf"},{"path":"text/fake_text.txt"}]`), &paths); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.BatchExtractFilesSync(paths, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

36
e2e/go/code_test.go generated Normal file
View File

@@ -0,0 +1,36 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: code
package e2e_test
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_CodeShebangDetection(t *testing.T) {
// Test language detection from shebang line via bytes input
mime_typeVal := `text/x-source-code`
result, err := kreuzberg.ExtractFileSync(`code/script.sh`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/x-source-code` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
if !strings.Contains(string(result.Content), `build`) {
t.Errorf("expected to contain %s", `build`)
}
if !strings.Contains(string(result.Content), `clean`) {
t.Errorf("expected to contain %s", `clean`)
}
}

338
e2e/go/contract_test.go generated Normal file
View File

@@ -0,0 +1,338 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: contract
package e2e_test
import (
"encoding/json"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ApiBatchBytesAsync(t *testing.T) {
// Tests async batch bytes extraction API (batch_extract_bytes)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ApiBatchBytesWithConfigsAsync(t *testing.T) {
// Tests async batch bytes extraction with per-file configs (batch_extract_bytes with file_configs parameter)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}
func Test_ApiBatchFileAsync(t *testing.T) {
// Tests async batch file extraction API (batch_extract_file)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ApiBatchFileWithConfigsAsync(t *testing.T) {
// Tests async batch file extraction with per-file configs (batch_extract_files with file_configs parameter)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}
func Test_ApiExtractBytesAsync(t *testing.T) {
// Tests async bytes extraction API (extract_bytes)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ApiExtractFileAsync(t *testing.T) {
// Tests async file extraction API (extract_file)
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, nil, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ConfigChunkingPrependHeadingContext(t *testing.T) {
// Tests markdown chunker prepends heading hierarchy to chunk content
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"chunking":{"chunker_type":"markdown","max_chars":300,"max_overlap":50,"prepend_heading_context":true}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`markdown/extraction_test.md`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'chunks' not available on result type
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil { return false }; for _, c := range chunks { if c.Content == "" { return false } }; return true }(), "expected true")
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil { return false }; for _, c := range chunks { if c.Metadata.HeadingContext == nil { return false } }; return true }(), "expected true")
assert.True(t, func() bool { chunks := result.Chunks; if chunks == nil || len(chunks) == 0 { return false }; return chunks[0].Metadata.HeadingContext != nil }(), "expected true")
}
func Test_ConfigDocumentStructureWithHeadings(t *testing.T) {
// Tests document structure with DOCX heading-driven nesting
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"include_document_structure":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`docx/fake.docx`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
// skipped: field 'document' not available on result type
// skipped: field 'document.nodes' not available on result type
}
func Test_ConfigElementTypes(t *testing.T) {
// Tests element-based result format with element type assertions on DOCX
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"result_format":"element_based"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`docx/unit_test_headers.docx`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
{
found := false
if strings.Contains(string(result.MimeType), `application/vnd.openxmlformats-officedocument.wordprocessingml.document`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
// skipped: field 'elements' not available on result type
}
func Test_ConfigExtractionTimeout(t *testing.T) {
// Tests that extraction_timeout_secs config field is accepted and does not affect fast extractions
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"extraction_timeout_secs":300}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
}
func Test_ConfigKeywords(t *testing.T) {
// Tests keyword extraction via YAKE algorithm
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"keywords":{"algorithm":"yake","max_keywords":10}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'keywords' not available on Go ExtractionResult
// skipped: field 'keywords' not available on Go ExtractionResult
}
func Test_ConfigPages(t *testing.T) {
// Tests page extraction and page marker configuration
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"pages":{"extract_pages":true,"insert_page_markers":true}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `PAGE`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_ConfigQualityEnabled(t *testing.T) {
// Tests quality scoring produces a score value in [0.0, 1.0]
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"enable_quality_processing":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
// skipped: field 'quality_score' not available on result type
}
func Test_ConfigSecurityLimits(t *testing.T) {
// Tests archive extraction with custom security limits
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"security_limits":{"max_archive_size":104857600,"max_compression_ratio":50,"max_files_in_archive":100}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`archives/documents.zip`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
{
found := false
if strings.Contains(string(result.MimeType), `application/zip`) { found = true }
if strings.Contains(string(result.MimeType), `application/x-zip-compressed`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
}
func Test_ConfigTreeSitter(t *testing.T) {
// Tests tree-sitter configuration round-trip
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"tree_sitter":{"groups":["web"],"languages":["python","rust"],"process":{"comments":false,"diagnostics":false,"docstrings":false,"exports":true,"imports":true,"structure":true,"symbols":false}}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`code/hello.py`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/x-source-code` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_OutputFormatBytesMarkdown(t *testing.T) {
// Tests markdown output format via bytes extraction API
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}
func Test_OutputFormatMarkdown(t *testing.T) {
// Tests Markdown output format
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"output_format":"markdown"}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFileSync(`pdf/fake_memo.pdf`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
// skipped: field 'metadata.output_format' not available on result type
}

59
e2e/go/detection_test.go generated Normal file
View File

@@ -0,0 +1,59 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: detection
package e2e_test
import (
"os"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_DetectMimeBytesHtml(t *testing.T) {
// Detect HTML MIME from bytes
contentBytes, contentBytesErr := os.ReadFile(`html/html.html`)
if contentBytesErr != nil {
t.Fatalf("read fixture html/html.html: %v", contentBytesErr)
}
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_DetectMimeBytesPdf(t *testing.T) {
// Detect PDF MIME type from bytes
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_DetectMimeBytesPng(t *testing.T) {
// Detect PNG MIME type from bytes
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
_, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_GetExtensionsUnknownMime(t *testing.T) {
// get_extensions unknown MIME
_, err := kreuzberg.GetExtensionsForMime(`application/x-totally-unknown`)
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: document_extractor_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_DocumentExtractorsClear(t *testing.T) {
// Clear all document extractors and verify list is empty
_ = kreuzberg.ClearDocumentExtractors()
}
func Test_ExtractorsList(t *testing.T) {
// List all registered document extractors
_, err := kreuzberg.ListDocumentExtractors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

61
e2e/go/embed_async_pending_test.go generated Normal file
View File

@@ -0,0 +1,61 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embed_async_pending
package e2e_test
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbedTextsAsyncEmptyInput(t *testing.T) {
// embed_texts_async: empty text list
var texts []string
if err := json.Unmarshal([]byte(`[]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.EmbedTextsAsync(texts, kreuzberg.EmbeddingConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.Equal(t, len(value), 0, "expected exactly 0 elements")
}
func Test_EmbedTextsAsyncHappy(t *testing.T) {
// embed_texts_async: basic async embedding
var texts []string
if err := json.Unmarshal([]byte(`["first","second"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.EmbedTextsAsync(texts, kreuzberg.EmbeddingConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.GreaterOrEqual(t, len(value), 2, "expected at least 2 elements")
}
func Test_EmbedTextsAsyncPresetSwitch(t *testing.T) {
// embed_texts_async: preset override
var texts []string
if err := json.Unmarshal([]byte(`["text"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
var config kreuzberg.EmbeddingConfig
if err := json.Unmarshal([]byte(`{"model":{"name":"balanced","type":"preset"}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.EmbedTextsAsync(texts, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

31
e2e/go/embed_extra_test.go generated Normal file
View File

@@ -0,0 +1,31 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embed_extra
package e2e_test
import (
"encoding/json"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbedTextsBatch(t *testing.T) {
// Batch embed texts
var texts []string
if err := json.Unmarshal([]byte(`["hello","world"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
var config kreuzberg.EmbeddingConfig
if err := json.Unmarshal([]byte(`{"model":{"name":"balanced","type":"preset"}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.EmbedTexts(texts, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embedding_backend_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbeddingBackendsClear(t *testing.T) {
// Clear all embedding backends and verify list is empty
_ = kreuzberg.ClearEmbeddingBackends()
}
func Test_EmbeddingBackendsList(t *testing.T) {
// List all registered embedding backends
_, err := kreuzberg.ListEmbeddingBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

62
e2e/go/embeddings_test.go generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: embeddings
package e2e_test
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_EmbedTextsDifferentPreset(t *testing.T) {
// embed_texts: multilingual preset
var texts []string
if err := json.Unmarshal([]byte(`["Hello world","test"]`), &texts); err != nil {
t.Fatalf("config parse failed: %v", err)
}
var config kreuzberg.EmbeddingConfig
if err := json.Unmarshal([]byte(`{"model":{"name":"multilingual","type":"preset"}}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.EmbedTexts(texts, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.GreaterOrEqual(t, len(value), 2, "expected at least 2 elements")
}
func Test_GetEmbeddingPresetKnown(t *testing.T) {
// get_embedding_preset: known preset
_ = kreuzberg.GetEmbeddingPreset(`balanced`)
}
func Test_GetEmbeddingPresetNominal(t *testing.T) {
// get_embedding_preset: nominal case
_ = kreuzberg.GetEmbeddingPreset(`balanced`)
}
func Test_GetEmbeddingPresetUnknown(t *testing.T) {
// get_embedding_preset: unknown preset fails
result := kreuzberg.GetEmbeddingPreset(`nonexistent-xyz`)
if result != nil {
t.Errorf("expected empty value, got %v", result)
}
}
func Test_ListEmbeddingPresetsSanity(t *testing.T) {
// list_embedding_presets: returns at least one
result := kreuzberg.ListEmbeddingPresets()
value := result
if len(value) == 0 {
t.Errorf("expected non-empty value")
}
}

80
e2e/go/error_test.go generated Normal file
View File

@@ -0,0 +1,80 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: error
package e2e_test
import (
"encoding/json"
"os"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ErrorEmptyBytes(t *testing.T) {
// Graceful handling of empty bytes (should not error)
contentBytes, contentBytesErr := os.ReadFile(`text/empty.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/empty.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `text/plain`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ErrorEmptyMime(t *testing.T) {
// Error when extracting with empty MIME type
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, ``, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_ErrorExtractBytesConflictingOcr(t *testing.T) {
// extract_bytes force+disable OCR
contentBytes, contentBytesErr := os.ReadFile(`text/fake_text.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/fake_text.txt: %v", contentBytesErr)
}
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"disable_ocr":true,"force_ocr":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `text/plain`, config)
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_ErrorInvalidMimeFormat(t *testing.T) {
// Error when extracting with invalid MIME type format
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `not-a-mime`, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}
func Test_ErrorUnsupportedMime(t *testing.T) {
// Error when extracting with unsupported MIME type
contentBytes, contentBytesErr := os.ReadFile(`text/plain.txt`)
if contentBytesErr != nil {
t.Fatalf("read fixture text/plain.txt: %v", contentBytesErr)
}
_, err := kreuzberg.ExtractBytesSync(contentBytes, `application/x-nonexistent`, kreuzberg.ExtractionConfig{})
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

86
e2e/go/format_specific_test.go generated Normal file
View File

@@ -0,0 +1,86 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: format_specific
package e2e_test
import (
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_FormatDocxStandalone(t *testing.T) {
// Standalone DOCX extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`docx/fake.docx`)
if contentBytesErr != nil {
t.Fatalf("read fixture docx/fake.docx: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/vnd.openxmlformats-officedocument.wordprocessingml.document`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
}
func Test_FormatHwpxStandalone(t *testing.T) {
// Standalone HWPX extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`hwpx/simple.hwpx`)
if contentBytesErr != nil {
t.Fatalf("read fixture hwpx/simple.hwpx: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/haansofthwpx`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
if !strings.Contains(string(result.Content), `Hello from HWPX`) {
t.Errorf("expected to contain %s, got %v", `Hello from HWPX`, result.Content)
}
}
func Test_FormatPdfText(t *testing.T) {
// Standalone PDF text extraction using extract_bytes_sync
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytesSync(contentBytes, `application/pdf`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
{
found := false
if strings.Contains(string(result.Content), `Mallori`) { found = true }
if strings.Contains(string(result.Content), `May`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_FormatPptx(t *testing.T) {
// PPTX presentation extraction using extract_file_sync
mime_typeVal := `application/vnd.openxmlformats-officedocument.presentationml.presentation`
_, err := kreuzberg.ExtractFileSync(`pptx/simple.pptx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_FormatXlsx(t *testing.T) {
// XLSX spreadsheet extraction using extract_file_sync
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
_, err := kreuzberg.ExtractFileSync(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

10
e2e/go/go.mod generated Normal file
View File

@@ -0,0 +1,10 @@
module e2e_go
go 1.26
require (
github.com/kreuzberg-dev/kreuzberg/v5 v5.0.0-rc.3
github.com/stretchr/testify v1.11.1
)
replace github.com/kreuzberg-dev/kreuzberg/v5 => ../../packages/go/v5

13
e2e/go/helpers_test.go generated Normal file
View File

@@ -0,0 +1,13 @@
package e2e_test
import "encoding/json"
// jsonString converts a value to its JSON string representation.
// Array fields use jsonString instead of fmt.Sprint to preserve structure.
func jsonString(value any) string {
encoded, err := json.Marshal(value)
if err != nil {
return ""
}
return string(encoded)
}

87
e2e/go/main_test.go generated Normal file
View File

@@ -0,0 +1,87 @@
package e2e_test
import (
"bufio"
"encoding/json"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
)
func TestMain(m *testing.M) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
// Change to the configured test-documents directory (if it exists) so that fixture
// file paths like "pdf/fake_memo.pdf" resolve correctly when running go test
// from e2e/go/. Repos without document fixtures (web crawler, network clients) do
// not ship this directory — skip chdir and run from e2e/go/.
testDocumentsDir := filepath.Join(dir, "..", "..", "test_documents")
if info, err := os.Stat(testDocumentsDir); err == nil && info.IsDir() {
if err := os.Chdir(testDocumentsDir); err != nil {
panic(err)
}
}
// If MOCK_SERVER_URL is already set, a parent process (e.g. `alef test-apps run`)
// started a shared mock-server and exported its URL (plus any MOCK_SERVERS /
// MOCK_SERVER_<FIXTURE_ID> vars). Use it as-is and do NOT spawn our own server.
if os.Getenv("MOCK_SERVER_URL") != "" {
os.Exit(m.Run())
}
// Start the mock HTTP server if it exists.
mockServerBin := filepath.Join(dir, "..", "rust", "target", "release", "mock-server")
if _, err := os.Stat(mockServerBin); err == nil {
fixturesDir := filepath.Join(dir, "..", "..", "fixtures")
cmd := exec.Command(mockServerBin, fixturesDir)
cmd.Stderr = os.Stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
panic(err)
}
// Keep a writable pipe to the mock-server's stdin so the
// server does not see EOF and exit immediately. The mock-server
// blocks reading stdin until the parent closes the pipe.
stdin, err := cmd.StdinPipe()
if err != nil {
panic(err)
}
if err := cmd.Start(); err != nil {
panic(err)
}
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "MOCK_SERVER_URL=") {
_ = os.Setenv("MOCK_SERVER_URL", strings.TrimPrefix(line, "MOCK_SERVER_URL="))
} else if strings.HasPrefix(line, "MOCK_SERVERS=") {
_jsonVal := strings.TrimPrefix(line, "MOCK_SERVERS=")
_ = os.Setenv("MOCK_SERVERS", _jsonVal)
// Parse the JSON map and set per-fixture env vars (MOCK_SERVER_<FIXTURE_ID>).
var _perFixture map[string]string
if err := json.Unmarshal([]byte(_jsonVal), &_perFixture); err == nil {
for _fid, _furl := range _perFixture {
_ = os.Setenv("MOCK_SERVER_"+strings.ToUpper(_fid), _furl)
}
}
break
} else if os.Getenv("MOCK_SERVER_URL") != "" {
break
}
}
go func() { _, _ = io.Copy(io.Discard, stdout) }()
code := m.Run()
_ = stdin.Close()
_ = cmd.Process.Signal(os.Interrupt)
_ = cmd.Wait()
os.Exit(code)
} else {
code := m.Run()
os.Exit(code)
}
}

58
e2e/go/mime_utilities_test.go generated Normal file
View File

@@ -0,0 +1,58 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: mime_utilities
package e2e_test
import (
"os"
"strings"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_MimeDetectBytes(t *testing.T) {
// Detect MIME type from file bytes
contentBytes, contentBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if contentBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", contentBytesErr)
}
result, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if !strings.Contains(string(result), `pdf`) {
t.Errorf("expected to contain %s, got %v", `pdf`, result)
}
}
func Test_MimeDetectImage(t *testing.T) {
// Detect MIME type from PNG image bytes
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
result, err := kreuzberg.DetectMimeTypeFromBytes(contentBytes)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if !strings.Contains(string(result), `png`) {
t.Errorf("expected to contain %s, got %v", `png`, result)
}
}
func Test_MimeGetExtensions(t *testing.T) {
// Get file extensions for a MIME type
result, err := kreuzberg.GetExtensionsForMime(`application/pdf`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
if !strings.Contains(jsonString(value), `pdf`) {
t.Errorf("expected to contain %s, got %v", `pdf`, value)
}
}

32
e2e/go/ocr_backend_management_test.go generated Normal file
View File

@@ -0,0 +1,32 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: ocr_backend_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_OcrBackendsClear(t *testing.T) {
// Clear all OCR backends and verify list is empty
_ = kreuzberg.ClearOcrBackends()
}
func Test_OcrBackendsList(t *testing.T) {
// List all registered OCR backends
_, err := kreuzberg.ListOcrBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_OcrBackendsUnregister(t *testing.T) {
// Unregister nonexistent OCR backend gracefully
_ = kreuzberg.UnregisterOcrBackend(`nonexistent-backend-xyz`)
}

43
e2e/go/pdf_test.go generated Normal file
View File

@@ -0,0 +1,43 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: pdf
package e2e_test
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_RenderPdfPageFirst(t *testing.T) {
// render_pdf_page_to_png: first page
pdf_bytesBytes, pdf_bytesBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if pdf_bytesBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", pdf_bytesBytesErr)
}
result, err := kreuzberg.RenderPdfPageToPng(pdf_bytesBytes, 0, nil, nil)
if err != nil {
t.Fatalf("call failed: %v", err)
}
value := result
assert.GreaterOrEqual(t, len(value), 100, "expected length >= 100")
}
func Test_RenderPdfPageOutOfRange(t *testing.T) {
// render_pdf_page_to_png: page out of range
pdf_bytesBytes, pdf_bytesBytesErr := os.ReadFile(`pdf/fake_memo.pdf`)
if pdf_bytesBytesErr != nil {
t.Fatalf("read fixture pdf/fake_memo.pdf: %v", pdf_bytesBytesErr)
}
_, err := kreuzberg.RenderPdfPageToPng(pdf_bytesBytes, 999, nil, nil)
if err == nil {
t.Errorf("expected an error, but call succeeded")
}
}

148
e2e/go/plugin_api_test.go generated Normal file
View File

@@ -0,0 +1,148 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: plugin_api
package e2e_test
import (
"encoding/json"
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
type testStub_register_document_extractor_trait_bridge struct{}
func (testStub_register_document_extractor_trait_bridge) ExtractBytes(content []byte, mimeType string, config kreuzberg.ExtractionConfig) (json.RawMessage, error) { return nil, nil }
func (testStub_register_document_extractor_trait_bridge) ExtractFile(path string, mimeType string, config kreuzberg.ExtractionConfig) (json.RawMessage, error) { return nil, nil }
func (testStub_register_document_extractor_trait_bridge) SupportedMimeTypes() []string { return nil }
func (testStub_register_document_extractor_trait_bridge) Priority() int32 { return 0 }
func (testStub_register_document_extractor_trait_bridge) CanHandle(path string, mimeType string) bool { return false }
func (testStub_register_document_extractor_trait_bridge) Name() string { return "" }
func (testStub_register_document_extractor_trait_bridge) Version() string { return "" }
func (testStub_register_document_extractor_trait_bridge) Initialize() error { return nil }
func (testStub_register_document_extractor_trait_bridge) Shutdown() error { return nil }
func (testStub_register_document_extractor_trait_bridge) Description() string { return "" }
func (testStub_register_document_extractor_trait_bridge) Author() string { return "" }
func Test_RegisterDocumentExtractorTraitBridge(t *testing.T) {
// register_document_extractor: trait bridge
_ = kreuzberg.RegisterDocumentExtractor(testStub_register_document_extractor_trait_bridge{})
}
type testStub_register_embedding_backend_trait_bridge struct{}
func (testStub_register_embedding_backend_trait_bridge) Dimensions() uint { return 0 }
func (testStub_register_embedding_backend_trait_bridge) Embed(texts []string) ([][]float32, error) { return nil, nil }
func (testStub_register_embedding_backend_trait_bridge) Name() string { return "" }
func (testStub_register_embedding_backend_trait_bridge) Version() string { return "" }
func (testStub_register_embedding_backend_trait_bridge) Initialize() error { return nil }
func (testStub_register_embedding_backend_trait_bridge) Shutdown() error { return nil }
func (testStub_register_embedding_backend_trait_bridge) Description() string { return "" }
func (testStub_register_embedding_backend_trait_bridge) Author() string { return "" }
func Test_RegisterEmbeddingBackendTraitBridge(t *testing.T) {
// register_embedding_backend: trait bridge
_ = kreuzberg.RegisterEmbeddingBackend(testStub_register_embedding_backend_trait_bridge{})
}
type testStub_register_ocr_backend_trait_bridge struct{}
func (testStub_register_ocr_backend_trait_bridge) ProcessImage(imageBytes []byte, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
func (testStub_register_ocr_backend_trait_bridge) ProcessImageFile(path string, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
func (testStub_register_ocr_backend_trait_bridge) SupportsLanguage(lang string) bool { return false }
func (testStub_register_ocr_backend_trait_bridge) BackendType() kreuzberg.OcrBackendType { return "" }
func (testStub_register_ocr_backend_trait_bridge) SupportedLanguages() []string { return nil }
func (testStub_register_ocr_backend_trait_bridge) SupportsTableDetection() bool { return false }
func (testStub_register_ocr_backend_trait_bridge) SupportsDocumentProcessing() bool { return false }
func (testStub_register_ocr_backend_trait_bridge) ProcessDocument(path string, config kreuzberg.OcrConfig) (kreuzberg.ExtractionResult, error) { return kreuzberg.ExtractionResult{}, nil }
func (testStub_register_ocr_backend_trait_bridge) Name() string { return "" }
func (testStub_register_ocr_backend_trait_bridge) Version() string { return "" }
func (testStub_register_ocr_backend_trait_bridge) Initialize() error { return nil }
func (testStub_register_ocr_backend_trait_bridge) Shutdown() error { return nil }
func (testStub_register_ocr_backend_trait_bridge) Description() string { return "" }
func (testStub_register_ocr_backend_trait_bridge) Author() string { return "" }
func Test_RegisterOcrBackendTraitBridge(t *testing.T) {
// register_ocr_backend: trait bridge
_ = kreuzberg.RegisterOcrBackend(testStub_register_ocr_backend_trait_bridge{})
}
type testStub_register_post_processor_trait_bridge struct{}
func (testStub_register_post_processor_trait_bridge) Process(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) error { return nil }
func (testStub_register_post_processor_trait_bridge) ProcessingStage() kreuzberg.ProcessingStage { return "" }
func (testStub_register_post_processor_trait_bridge) ShouldProcess(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) bool { return false }
func (testStub_register_post_processor_trait_bridge) EstimatedDurationMs(result kreuzberg.ExtractionResult) uint64 { return 0 }
func (testStub_register_post_processor_trait_bridge) Priority() int32 { return 0 }
func (testStub_register_post_processor_trait_bridge) Name() string { return "" }
func (testStub_register_post_processor_trait_bridge) Version() string { return "" }
func (testStub_register_post_processor_trait_bridge) Initialize() error { return nil }
func (testStub_register_post_processor_trait_bridge) Shutdown() error { return nil }
func (testStub_register_post_processor_trait_bridge) Description() string { return "" }
func (testStub_register_post_processor_trait_bridge) Author() string { return "" }
func Test_RegisterPostProcessorTraitBridge(t *testing.T) {
// register_post_processor: trait bridge
_ = kreuzberg.RegisterPostProcessor(testStub_register_post_processor_trait_bridge{})
}
type testStub_register_renderer_trait_bridge struct{}
func (testStub_register_renderer_trait_bridge) Render(doc json.RawMessage) (string, error) { return "", nil }
func (testStub_register_renderer_trait_bridge) Name() string { return "" }
func (testStub_register_renderer_trait_bridge) Version() string { return "" }
func (testStub_register_renderer_trait_bridge) Initialize() error { return nil }
func (testStub_register_renderer_trait_bridge) Shutdown() error { return nil }
func (testStub_register_renderer_trait_bridge) Description() string { return "" }
func (testStub_register_renderer_trait_bridge) Author() string { return "" }
func Test_RegisterRendererTraitBridge(t *testing.T) {
// register_renderer: trait bridge
_ = kreuzberg.RegisterRenderer(testStub_register_renderer_trait_bridge{})
}
type testStub_register_validator_trait_bridge struct{}
func (testStub_register_validator_trait_bridge) Validate(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) error { return nil }
func (testStub_register_validator_trait_bridge) ShouldValidate(result kreuzberg.ExtractionResult, config kreuzberg.ExtractionConfig) bool { return false }
func (testStub_register_validator_trait_bridge) Priority() int32 { return 0 }
func (testStub_register_validator_trait_bridge) Name() string { return "" }
func (testStub_register_validator_trait_bridge) Version() string { return "" }
func (testStub_register_validator_trait_bridge) Initialize() error { return nil }
func (testStub_register_validator_trait_bridge) Shutdown() error { return nil }
func (testStub_register_validator_trait_bridge) Description() string { return "" }
func (testStub_register_validator_trait_bridge) Author() string { return "" }
func Test_RegisterValidatorTraitBridge(t *testing.T) {
// register_validator: trait bridge
_ = kreuzberg.RegisterValidator(testStub_register_validator_trait_bridge{})
}
func Test_UnregisterDocumentExtractorAfterRegister(t *testing.T) {
// unregister_document_extractor
_ = kreuzberg.UnregisterDocumentExtractor(`test-extractor`)
}
func Test_UnregisterEmbeddingBackendAfterRegister(t *testing.T) {
// unregister_embedding_backend
_ = kreuzberg.UnregisterEmbeddingBackend(`test-embedding-backend`)
}
func Test_UnregisterPostProcessorAfterRegister(t *testing.T) {
// unregister_post_processor
_ = kreuzberg.UnregisterPostProcessor(`test-processor`)
}
func Test_UnregisterRendererAfterRegister(t *testing.T) {
// unregister_renderer
_ = kreuzberg.UnregisterRenderer(`test-renderer`)
}
func Test_UnregisterValidatorAfterRegister(t *testing.T) {
// unregister_validator
_ = kreuzberg.UnregisterValidator(`test-validator`)
}

27
e2e/go/post_processor_management_test.go generated Normal file
View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: post_processor_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_PostProcessorsClear(t *testing.T) {
// Clear all post-processors and verify list is empty
_ = kreuzberg.ClearPostProcessors()
}
func Test_PostProcessorsList(t *testing.T) {
// List all registered post-processors
_, err := kreuzberg.ListPostProcessors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

38
e2e/go/registry_operations_test.go generated Normal file
View File

@@ -0,0 +1,38 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: registry_operations
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ExtensionsDocx(t *testing.T) {
// Get file extensions for DOCX MIME type
_, err := kreuzberg.GetExtensionsForMime(`application/vnd.openxmlformats-officedocument.wordprocessingml.document`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ExtensionsHtml(t *testing.T) {
// Get file extensions for HTML MIME type
_, err := kreuzberg.GetExtensionsForMime(`text/html`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ExtensionsPdf(t *testing.T) {
// Get file extensions for PDF MIME type
_, err := kreuzberg.GetExtensionsForMime(`application/pdf`)
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

62
e2e/go/registry_test.go generated Normal file
View File

@@ -0,0 +1,62 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: registry
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ListDocumentExtractors(t *testing.T) {
// List document extractors
_, err := kreuzberg.ListDocumentExtractors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListEmbeddingBackends(t *testing.T) {
// List embedding backends
_, err := kreuzberg.ListEmbeddingBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListOcrBackends(t *testing.T) {
// List OCR backends
_, err := kreuzberg.ListOcrBackends()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListPostProcessors(t *testing.T) {
// List post-processors
_, err := kreuzberg.ListPostProcessors()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListRenderers(t *testing.T) {
// List renderers
_, err := kreuzberg.ListRenderers()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}
func Test_ListValidators(t *testing.T) {
// List validators
_, err := kreuzberg.ListValidators()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

27
e2e/go/renderer_management_test.go generated Normal file
View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: renderer_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_RenderersClear(t *testing.T) {
// Clear all renderers and verify list is empty
_ = kreuzberg.ClearRenderers()
}
func Test_RenderersList(t *testing.T) {
// List all registered renderers
_, err := kreuzberg.ListRenderers()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}

196
e2e/go/smoke_test.go generated Normal file
View File

@@ -0,0 +1,196 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: smoke
package e2e_test
import (
"encoding/json"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_OcrImagePng(t *testing.T) {
// OCR: PNG image extraction with OCR enabled. In WASM this exercises the Uint8Array bridge parameter and Promise await in the generated OcrBackend bridge.
contentBytes, contentBytesErr := os.ReadFile(`images/test_hello_world.png`)
if contentBytesErr != nil {
t.Fatalf("read fixture images/test_hello_world.png: %v", contentBytesErr)
}
result, err := kreuzberg.ExtractBytes(contentBytes, `image/png`, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 1, "expected length >= 1")
{
found := false
if strings.Contains(string(result.Content), `Hello`) { found = true }
if strings.Contains(string(result.Content), `World`) { found = true }
if strings.Contains(string(result.Content), `hello`) { found = true }
if strings.Contains(string(result.Content), `world`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeDocxBasic(t *testing.T) {
// Smoke test: DOCX with formatted text
mime_typeVal := `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
result, err := kreuzberg.ExtractFile(`docx/fake.docx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.wordprocessingml.document` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 20, "expected length >= 20")
{
found := false
if strings.Contains(string(result.Content), `Lorem`) { found = true }
if strings.Contains(string(result.Content), `ipsum`) { found = true }
if strings.Contains(string(result.Content), `document`) { found = true }
if strings.Contains(string(result.Content), `text`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeHtmlBasic(t *testing.T) {
// Smoke test: HTML table extraction
mime_typeVal := `text/html`
result, err := kreuzberg.ExtractFile(`html/simple_table.html`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/html` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 10, "expected length >= 10")
{
found := false
if strings.Contains(string(result.Content), `Sample Data Table`) { found = true }
if strings.Contains(string(result.Content), `Laptop`) { found = true }
if strings.Contains(string(result.Content), `Electronics`) { found = true }
if strings.Contains(string(result.Content), `Product`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeImagePng(t *testing.T) {
// Smoke test: PNG image (without OCR, metadata only)
var config kreuzberg.ExtractionConfig
if err := json.Unmarshal([]byte(`{"disable_ocr":true}`), &config); err != nil {
t.Fatalf("config parse failed: %v", err)
}
result, err := kreuzberg.ExtractFile(`images/sample.png`, nil, config)
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `image/png` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
}
func Test_SmokeJsonBasic(t *testing.T) {
// Smoke test: JSON file extraction
mime_typeVal := `application/json`
result, err := kreuzberg.ExtractFile(`json/simple.json`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/json` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_SmokePdfBasic(t *testing.T) {
// Smoke test: PDF with simple text extraction
mime_typeVal := `application/pdf`
result, err := kreuzberg.ExtractFile(`pdf/fake_memo.pdf`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/pdf` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 50, "expected length >= 50")
{
found := false
if strings.Contains(string(result.Content), `May 5, 2023`) { found = true }
if strings.Contains(string(result.Content), `To Whom it May Concern`) { found = true }
if !found {
t.Errorf("expected to contain at least one of the specified values")
}
}
}
func Test_SmokeTxtBasic(t *testing.T) {
// Smoke test: Plain text file
mime_typeVal := `text/plain`
result, err := kreuzberg.ExtractFile(`text/report.txt`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `text/plain` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 5, "expected length >= 5")
}
func Test_SmokeXlsxBasic(t *testing.T) {
// Smoke test: XLSX with basic spreadsheet data including tables
mime_typeVal := `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet`
result, err := kreuzberg.ExtractFile(`xlsx/stanley_cups.xlsx`, &mime_typeVal, kreuzberg.ExtractionConfig{})
if err != nil {
t.Fatalf("call failed: %v", err)
}
if strings.TrimSpace(string(result.MimeType)) != `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` {
t.Errorf("equals mismatch: got %v", result.MimeType)
}
assert.GreaterOrEqual(t, len(result.Content), 100, "expected length >= 100")
if !strings.Contains(string(result.Content), `Team`) {
t.Errorf("expected to contain %s", `Team`)
}
if !strings.Contains(string(result.Content), `Location`) {
t.Errorf("expected to contain %s", `Location`)
}
if !strings.Contains(string(result.Content), `Stanley Cups`) {
t.Errorf("expected to contain %s", `Stanley Cups`)
}
if !strings.Contains(string(result.Content), `Blues`) {
t.Errorf("expected to contain %s", `Blues`)
}
if !strings.Contains(string(result.Content), `Flyers`) {
t.Errorf("expected to contain %s", `Flyers`)
}
if !strings.Contains(string(result.Content), `Maple Leafs`) {
t.Errorf("expected to contain %s", `Maple Leafs`)
}
if !strings.Contains(string(result.Content), `STL`) {
t.Errorf("expected to contain %s", `STL`)
}
if !strings.Contains(string(result.Content), `PHI`) {
t.Errorf("expected to contain %s", `PHI`)
}
if !strings.Contains(string(result.Content), `TOR`) {
t.Errorf("expected to contain %s", `TOR`)
}
// skipped: field 'tables' not available on result type
// skipped: field 'metadata.format.excel.sheet_count' not available on result type
// skipped: field 'metadata.format.excel.sheet_names' not available on result type
}

27
e2e/go/validator_management_test.go generated Normal file
View File

@@ -0,0 +1,27 @@
// This file is auto-generated by alef — DO NOT EDIT.
// alef:hash:4e15143f4af1ae8bafbdb1506ef057da924484c66a19483966333558ad437e75
// To regenerate: alef generate
// To verify freshness: alef verify --exit-code
// Issues & docs: https://github.com/kreuzberg-dev/alef
// E2e tests for category: validator_management
package e2e_test
import (
"testing"
kreuzberg "github.com/kreuzberg-dev/kreuzberg/v5"
)
func Test_ValidatorsClear(t *testing.T) {
// Clear all validators and verify list is empty
_ = kreuzberg.ClearValidators()
}
func Test_ValidatorsList(t *testing.T) {
// List all registered validators
_, err := kreuzberg.ListValidators()
if err != nil {
t.Fatalf("call failed: %v", err)
}
}