Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
exclude = ["crates/micro-hnsw-wasm", "crates/ruvector-hyperbolic-hnsw", "crates/ruvector-hyperbolic-hnsw-wasm", "examples/ruvLLM/esp32", "examples/ruvLLM/esp32-flash", "examples/edge-net", "examples/data", "examples/ruvLLM", "examples/delta-behavior", "crates/rvf", "examples/rvf-desktop", "crates/mcp-brain-server"]
exclude = ["crates/micro-hnsw-wasm", "crates/ruvector-hyperbolic-hnsw", "crates/ruvector-hyperbolic-hnsw-wasm", "examples/ruvLLM/esp32", "examples/ruvLLM/esp32-flash", "examples/edge-net", "examples/data", "examples/ruvLLM", "examples/delta-behavior", "crates/rvf", "crates/rvf/*", "crates/rvf/*/*", "examples/rvf-desktop", "crates/mcp-brain-server"]
members = [
"crates/ruvector-core",
"crates/ruvector-node",
Expand Down Expand Up @@ -54,7 +54,6 @@ members = [
"crates/ruvector-fpga-transformer",
"crates/ruvector-fpga-transformer-wasm",
"crates/ruvector-sparse-inference",
"crates/ruvector-sparse-inference-wasm",
"crates/ruvector-math",
"crates/ruvector-math-wasm",
"examples/benchmarks",
Expand Down
99 changes: 4 additions & 95 deletions crates/ruvector-sparse-inference-wasm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use ruvector_sparse_inference::{
model::{GenerationConfig, GgufParser, KVCache, ModelMetadata, ModelRunner},
predictor::LowRankPredictor,
InferenceConfig, SparseModel, SparsityConfig,
model::{GgufParser, ModelMetadata, ModelRunner, SparseModel},
InferenceConfig, LowRankPredictor, SparsityConfig,
};
use wasm_bindgen::prelude::*;

Expand All @@ -17,7 +16,6 @@ pub fn init() {
pub struct SparseInferenceEngine {
model: SparseModel,
config: InferenceConfig,
predictors: Vec<LowRankPredictor>,
}

#[wasm_bindgen]
Expand All @@ -31,13 +29,7 @@ impl SparseInferenceEngine {
let model = GgufParser::parse(model_bytes)
.map_err(|e| JsError::new(&format!("Failed to parse model: {}", e)))?;

let predictors = Self::init_predictors(&model, &config);

Ok(Self {
model,
config,
predictors,
})
Ok(Self { model, config })
}

/// Load model with streaming (for large models)
Expand All @@ -46,7 +38,6 @@ impl SparseInferenceEngine {
url: &str,
config_json: &str,
) -> Result<SparseInferenceEngine, JsError> {
// Fetch model in chunks
let bytes = fetch_model_bytes(url).await?;
Self::new(&bytes, config_json)
}
Expand All @@ -59,21 +50,6 @@ impl SparseInferenceEngine {
.map_err(|e| JsError::new(&format!("Inference failed: {}", e)))
}

/// Run text generation (for LLM models)
#[wasm_bindgen]
pub fn generate(&mut self, input_ids: &[u32], max_tokens: u32) -> Result<Vec<u32>, JsError> {
let config = GenerationConfig {
max_new_tokens: max_tokens as usize,
temperature: self.config.temperature,
top_k: self.config.top_k,
..Default::default()
};

self.model
.generate(input_ids, &config)
.map_err(|e| JsError::new(&format!("Generation failed: {}", e)))
}

/// Get model metadata as JSON
#[wasm_bindgen]
pub fn metadata(&self) -> String {
Expand All @@ -87,34 +63,14 @@ impl SparseInferenceEngine {
serde_json::to_string(&stats).unwrap_or_default()
}

/// Update sparsity threshold
#[wasm_bindgen]
pub fn set_sparsity(&mut self, threshold: f32) {
self.config.sparsity.threshold = threshold;
for predictor in &mut self.predictors {
predictor.set_threshold(threshold);
}
}

/// Calibrate predictors with sample inputs
/// Calibrate with sample inputs
#[wasm_bindgen]
pub fn calibrate(&mut self, samples: &[f32], sample_dim: usize) -> Result<(), JsError> {
let samples: Vec<Vec<f32>> = samples.chunks(sample_dim).map(|c| c.to_vec()).collect();

self.model
.calibrate(&samples)
.map_err(|e| JsError::new(&format!("Calibration failed: {}", e)))
}

/// Initialize predictors for each layer
fn init_predictors(model: &SparseModel, config: &InferenceConfig) -> Vec<LowRankPredictor> {
let num_layers = model.metadata().num_layers;
let hidden_size = model.metadata().hidden_size;

(0..num_layers)
.map(|_| LowRankPredictor::new(hidden_size, config.sparsity.threshold))
.collect()
}
}

/// Embedding model wrapper for sentence transformers
Expand Down Expand Up @@ -147,7 +103,6 @@ impl EmbeddingModel {
pub fn encode_batch(&self, input_ids: &[u32], lengths: &[u32]) -> Result<Vec<f32>, JsError> {
let mut results = Vec::new();
let mut offset = 0usize;

for &len in lengths {
let len = len as usize;
if offset + len > input_ids.len() {
Expand All @@ -162,7 +117,6 @@ impl EmbeddingModel {
results.extend(embedding);
offset += len;
}

Ok(results)
}

Expand All @@ -173,51 +127,6 @@ impl EmbeddingModel {
}
}

/// LLM model wrapper for text generation
#[wasm_bindgen]
pub struct LLMModel {
engine: SparseInferenceEngine,
kv_cache: KVCache,
}

#[wasm_bindgen]
impl LLMModel {
#[wasm_bindgen(constructor)]
pub fn new(model_bytes: &[u8], config_json: &str) -> Result<LLMModel, JsError> {
let engine = SparseInferenceEngine::new(model_bytes, config_json)?;
let cache_size = engine.model.metadata().max_position_embeddings;
let kv_cache = KVCache::new(cache_size);
Ok(Self { engine, kv_cache })
}

/// Generate next token
#[wasm_bindgen]
pub fn next_token(&mut self, input_ids: &[u32]) -> Result<u32, JsError> {
self.engine
.model
.next_token(input_ids, &mut self.kv_cache)
.map_err(|e| JsError::new(&format!("Generation failed: {}", e)))
}

/// Generate multiple tokens
#[wasm_bindgen]
pub fn generate(&mut self, input_ids: &[u32], max_tokens: u32) -> Result<Vec<u32>, JsError> {
self.engine.generate(input_ids, max_tokens)
}

/// Reset KV cache (for new conversation)
#[wasm_bindgen]
pub fn reset_cache(&mut self) {
self.kv_cache.clear();
}

/// Get generation statistics
#[wasm_bindgen]
pub fn stats(&self) -> String {
serde_json::to_string(&self.engine.model.generation_stats()).unwrap_or_default()
}
}

/// Performance measurement utilities
#[wasm_bindgen]
pub fn measure_inference_time(
Expand Down
20 changes: 20 additions & 0 deletions crates/rvf/rvf-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ pub struct RvfOptions {
pub metric: Option<String>,
/// Hardware profile: 0=Generic, 1=Core, 2=Hot, 3=Full. Defaults to 0.
pub profile: Option<u32>,
/// Compression profile: "None" | "Scalar" | "Product". Defaults to "None".
pub compression: Option<String>,
/// Whether segment signing is enabled. Defaults to false.
pub signing: Option<bool>,
/// HNSW M parameter. Defaults to 16.
Expand Down Expand Up @@ -166,16 +168,34 @@ fn parse_metric(s: &str) -> Result<DistanceMetric> {
}
}

fn parse_compression(s: &str) -> Result<rvf_runtime::options::CompressionProfile> {
use rvf_runtime::options::CompressionProfile;
match s {
"None" | "none" => Ok(CompressionProfile::None),
"Scalar" | "scalar" => Ok(CompressionProfile::Scalar),
"Product" | "product" => Ok(CompressionProfile::Product),
_ => Err(napi::Error::from_reason(format!(
"Invalid compression '{s}'. Expected 'None', 'Scalar', or 'Product'."
))),
}
}

fn js_options_to_rust(opts: &RvfOptions) -> Result<RustRvfOptions> {
let metric = match &opts.metric {
Some(m) => parse_metric(m)?,
None => DistanceMetric::L2,
};

let compression = match &opts.compression {
Some(c) => parse_compression(c)?,
None => rvf_runtime::options::CompressionProfile::None,
};

Ok(RustRvfOptions {
dimension: opts.dimension as u16,
metric,
profile: opts.profile.unwrap_or(0) as u8,
compression,
signing: opts.signing.unwrap_or(false),
m: opts.m.unwrap_or(16) as u16,
ef_construction: opts.ef_construction.unwrap_or(200) as u16,
Expand Down
3 changes: 2 additions & 1 deletion examples/google-cloud/src/self_learning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ pub struct AutonomousModel {

impl AutonomousModel {
pub fn new(input_dim: usize, hidden_dim: usize, _output_dim: usize) -> Self {
let gnn_layer = RuvectorLayer::new(input_dim, hidden_dim, 8, 0.1);
let gnn_layer = RuvectorLayer::new(input_dim, hidden_dim, 8, 0.1)
.expect("Failed to create GNN layer");

let optimizer = Optimizer::new(OptimizerType::Adam {
learning_rate: 0.001,
Expand Down
75 changes: 63 additions & 12 deletions examples/onnx-embeddings/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,11 @@ impl OnnxModel {
let inputs: Vec<String> = session.inputs.iter().map(|i| i.name.clone()).collect();
let outputs: Vec<String> = session.outputs.iter().map(|o| o.name.clone()).collect();

// Default embedding dimension (will be determined at runtime from actual output)
// Most sentence-transformers models output 384 dimensions
let dimension = 384;
// Try to detect embedding dimension from the ONNX output tensor metadata.
// The last dimension of the first output is typically the hidden/embedding size.
let dimension = Self::detect_dimension_from_outputs(session)
.or_else(|| Self::detect_dimension_from_path(path))
.unwrap_or(384);

let name = path
.file_stem()
Expand All @@ -204,6 +206,51 @@ impl OnnxModel {
})
}

/// Try to read the embedding dimension from the ONNX output tensor shape metadata.
/// Returns `None` if the dimension is dynamic or unavailable.
fn detect_dimension_from_outputs(session: &Session) -> Option<usize> {
if let Some(output) = session.outputs.first() {
// In ort 2.0, tensor_shape() returns Option<&Shape> where Shape contains i64 dims.
// Dynamic dimensions are -1; we want the last fixed (positive) dimension,
// which is typically the embedding/hidden size.
if let Some(shape) = output.output_type.tensor_shape() {
for &d in shape.iter().rev() {
if d > 0 {
debug!("Detected embedding dimension from ONNX output metadata: {}", d);
return Some(d as usize);
}
}
}
}
None
}

/// Heuristic: infer dimension from known model names in the file path.
fn detect_dimension_from_path(path: &Path) -> Option<usize> {
let path_str = path.to_string_lossy().to_lowercase();
let mappings: &[(&str, usize)] = &[
("mpnet-base", 768),
("all-mpnet", 768),
("bge-large", 1024),
("bge-base", 768),
("bge-small", 384),
("e5-large", 1024),
("e5-base", 768),
("e5-small", 384),
("gte-large", 1024),
("gte-base", 768),
("gte-small", 384),
("minilm", 384),
];
for &(pattern, dim) in mappings {
if path_str.contains(pattern) {
debug!("Inferred embedding dimension {} from path pattern '{}'", dim, pattern);
return Some(dim);
}
}
None
}

/// Run inference on encoded inputs
#[instrument(skip_all, fields(batch_size, seq_length))]
pub fn run(
Expand Down Expand Up @@ -236,19 +283,23 @@ impl OnnxModel {
))
.map_err(|e| EmbeddingError::invalid_model(e.to_string()))?;

let token_type_ids_tensor = Tensor::from_array((
vec![batch_size, seq_length],
token_type_ids.to_vec().into_boxed_slice(),
))
.map_err(|e| EmbeddingError::invalid_model(e.to_string()))?;

// Build inputs vector
let inputs = vec![
// Build inputs vector — only include token_type_ids if the model expects it.
// Models like DistilBERT and many newer transformers do not use token_type_ids
// and will crash if it is provided.
let mut inputs = vec![
("input_ids", input_ids_tensor.into_dyn()),
("attention_mask", attention_mask_tensor.into_dyn()),
("token_type_ids", token_type_ids_tensor.into_dyn()),
];

if self.info.input_names.iter().any(|n| n == "token_type_ids") {
let token_type_ids_tensor = Tensor::from_array((
vec![batch_size, seq_length],
token_type_ids.to_vec().into_boxed_slice(),
))
.map_err(|e| EmbeddingError::invalid_model(e.to_string()))?;
inputs.push(("token_type_ids", token_type_ids_tensor.into_dyn()));
}

// Run inference
let outputs = self.session.run(inputs)
.map_err(EmbeddingError::OnnxRuntime)?;
Expand Down
Loading
Loading