Files
fil/docs/snippets/go/plugins/min_length_validator.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.8 KiB

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"unsafe"

	"github.com/kreuzberg-dev/kreuzberg/packages/go/v5"
)

/*
#cgo CFLAGS: -I${SRCDIR}/../../../crates/kreuzberg-ffi
#cgo LDFLAGS: -L${SRCDIR}/../../../target/release -L${SRCDIR}/../../../target/debug -lkreuzberg_ffi
#include "../../../crates/kreuzberg-ffi/kreuzberg.h"
#include <stdlib.h>
*/
import "C"

// minLengthConfig holds the configuration for the minimum length validator
var minLengthConfig = struct {
	minLength int
}{
	minLength: 100,
}

// minLengthValidator validates that extracted content meets minimum length requirement
//export minLengthValidator
func minLengthValidator(resultJSON *C.char) *C.char {
	jsonStr := C.GoString(resultJSON)
	var result map[string]interface{}

	if err := json.Unmarshal([]byte(jsonStr), &result); err != nil {
		return C.CString("Failed to parse result JSON")
	}

	content, ok := result["content"].(string)
	if !ok {
		return C.CString("Missing content field in result")
	}

	if len(content) < minLengthConfig.minLength {
		errMsg := fmt.Sprintf("Content too short: %d < %d", len(content), minLengthConfig.minLength)
		return C.CString(errMsg)
	}

	// Validation passed
	return nil
}

func main() {
	// Register the validator with priority 100 (runs early)
	if err := kreuzberg.RegisterValidator("min_length_validator", 100,
		(C.ValidatorCallback)(C.minLengthValidator)); err != nil {
		log.Fatalf("failed to register validator: %v", err)
	}
	defer func() {
		if err := kreuzberg.UnregisterValidator("min_length_validator"); err != nil {
			log.Printf("warning: failed to unregister validator: %v", err)
		}
	}()

	// Extract and validate
	result, err := kreuzberg.ExtractFileSync("document.pdf", nil)
	if err != nil {
		log.Fatalf("extraction failed: %v", err)
	}

	log.Printf("Validation passed. Content length: %d", len(result.Content))
}