Files
fil/docs/snippets/rust/plugins/embedding_backend.md
Henrik Jess Nielsen b4c07d3693
All checks were successful
Deploy fil (kreuzberg) / deploy (push) Successful in 49s
Nomad changes
2026-06-01 23:40:55 +02:00

1.6 KiB

use kreuzberg::plugins::{EmbeddingBackend, Plugin, register_embedding_backend};
use kreuzberg::{EmbeddingConfig, EmbeddingModelType, Result, embed_texts};
use async_trait::async_trait;
use std::sync::Arc;

// Wrap an already-loaded embedder (e.g. a tuned ONNX session or any host-language
// embedder) so kreuzberg can call back into it during chunking and standalone
// embed requests.
struct MyEmbedder {
    // Hold whatever model handles the host already owns.
}

impl Plugin for MyEmbedder {
    fn name(&self) -> &str { "my-embedder" }
    fn version(&self) -> String { "1.0.0".to_string() }
    fn initialize(&self) -> Result<()> { Ok(()) }
    fn shutdown(&self) -> Result<()> { Ok(()) }
}

#[async_trait]
impl EmbeddingBackend for MyEmbedder {
    // Captured once at registration; used for shape validation on every dispatch.
    fn dimensions(&self) -> usize { 768 }

    async fn embed(&self, texts: Vec<String>) -> Result<Vec<Vec<f32>>> {
        // Delegate to the already-loaded host model.
        Ok(texts.iter().map(|_| vec![0.0; 768]).collect())
    }
}

fn main() -> Result<()> {
    // Register once at startup.
    register_embedding_backend(Arc::new(MyEmbedder {}))?;

    let config = EmbeddingConfig {
        model: EmbeddingModelType::Plugin { name: "my-embedder".to_string() },
        // Optional: bound the wait on a hung backend (default 60s; `None` disables).
        max_embed_duration_secs: Some(30),
        ..Default::default()
    };

    let vectors = embed_texts(&["Hello, world!", "Second text"], &config)?;
    assert_eq!(vectors.len(), 2);
    Ok(())
}