From c36395ddde81036d195069e8785fc678be3f977a Mon Sep 17 00:00:00 2001 From: Antigravity Brain Date: Fri, 27 Feb 2026 18:37:37 +0700 Subject: [PATCH 1/3] perf(core): Bypass O(N) index reconstruction with O(1) HNSW graph load on DB restart --- crates/ruvector-core/src/index.rs | 5 ++ crates/ruvector-core/src/index/hnsw.rs | 4 ++ crates/ruvector-core/src/vector_db.rs | 85 +++++++++++++++++++------- 3 files changed, 71 insertions(+), 23 deletions(-) diff --git a/crates/ruvector-core/src/index.rs b/crates/ruvector-core/src/index.rs index eadb730be..095500a09 100644 --- a/crates/ruvector-core/src/index.rs +++ b/crates/ruvector-core/src/index.rs @@ -33,4 +33,9 @@ pub trait VectorIndex: Send + Sync { fn is_empty(&self) -> bool { self.len() == 0 } + + /// Dump the index to a byte buffer for O(1) fast persistence + fn dump(&self) -> Result>> { + Ok(None) + } } diff --git a/crates/ruvector-core/src/index/hnsw.rs b/crates/ruvector-core/src/index/hnsw.rs index 83985cd7c..ead513b17 100644 --- a/crates/ruvector-core/src/index/hnsw.rs +++ b/crates/ruvector-core/src/index/hnsw.rs @@ -353,6 +353,10 @@ impl VectorIndex for HnswIndex { fn len(&self) -> usize { self.inner.read().vectors.len() } + + fn dump(&self) -> Result>> { + Ok(Some(self.serialize()?)) + } } #[cfg(test)] diff --git a/crates/ruvector-core/src/vector_db.rs b/crates/ruvector-core/src/vector_db.rs index f29b863f5..87c5845b8 100644 --- a/crates/ruvector-core/src/vector_db.rs +++ b/crates/ruvector-core/src/vector_db.rs @@ -81,11 +81,34 @@ impl VectorDB { let mut index: Box = if let Some(hnsw_config) = &options.hnsw_config { #[cfg(feature = "hnsw")] { - Box::new(HnswIndex::new( - options.dimensions, - options.distance_metric, - hnsw_config.clone(), - )?) + let mut loaded_index = None; + #[cfg(feature = "storage")] + { + let bin_path = format!("{}_hnsw.bin", options.storage_path); + if std::path::Path::new(&bin_path).exists() { + tracing::info!("Found persisted HNSW index graph, attempting O(1) fast load..."); + match std::fs::read(&bin_path) { + Ok(bytes) => { + match HnswIndex::deserialize(&bytes) { + Ok(idx) => { + tracing::info!("Successfully loaded HNSW graph with {} vectors via Zero-Copy bypass", idx.len()); + loaded_index = Some(Box::new(idx) as Box); + } + Err(e) => tracing::warn!("Failed to deserialize HNSW index, falling back to rebuild: {}", e), + } + } + Err(e) => tracing::warn!("Failed to read HNSW bin file: {}", e), + } + } + } + + loaded_index.unwrap_or_else(|| { + Box::new(HnswIndex::new( + options.dimensions, + options.distance_metric, + hnsw_config.clone(), + ).expect("Failed to initialize HNSW index")) as Box + }) } #[cfg(not(feature = "hnsw"))] { @@ -97,29 +120,31 @@ impl VectorDB { Box::new(FlatIndex::new(options.dimensions, options.distance_metric)) }; - // Rebuild index from persisted vectors if storage is not empty - // This fixes the bug where search() returns empty results after restart + // Rebuild index from persisted vectors if storage is not empty and index is empty + // This fixes the bug where search() returns empty results after restart if no dump exists #[cfg(feature = "storage")] { - let stored_ids = storage.all_ids()?; - if !stored_ids.is_empty() { - tracing::info!( - "Rebuilding index from {} persisted vectors", - stored_ids.len() - ); - - // Batch load all vectors for efficient index rebuilding - let mut entries = Vec::with_capacity(stored_ids.len()); - for id in stored_ids { - if let Some(entry) = storage.get(&id)? { - entries.push((id, entry.vector)); + if index.is_empty() { + let stored_ids = storage.all_ids()?; + if !stored_ids.is_empty() { + tracing::info!( + "Rebuilding index from {} persisted vectors (Fallback O(N) Initialization)", + stored_ids.len() + ); + + // Batch load all vectors for efficient index rebuilding + let mut entries = Vec::with_capacity(stored_ids.len()); + for id in stored_ids { + if let Some(entry) = storage.get(&id)? { + entries.push((id, entry.vector)); + } } - } - // Add all vectors to index in batch for better performance - index.add_batch(entries)?; + // Add all vectors to index in batch for better performance + index.add_batch(entries)?; - tracing::info!("Index rebuilt successfully"); + tracing::info!("Index rebuilt fully successfully"); + } } } @@ -223,6 +248,20 @@ impl VectorDB { self.storage.is_empty() } + /// Save the current index graph to disk (O(1) startup optimization bypass) + #[cfg(feature = "storage")] + pub fn save_index(&self) -> Result<()> { + let index_lock = self.index.read(); + + if let Ok(Some(bytes)) = index_lock.dump() { + let bin_path = format!("{}_hnsw.bin", self.options.storage_path); + std::fs::write(&bin_path, bytes)?; + tracing::info!("Index graph serialized successfully to disk for O(1) fast load"); + } + + Ok(()) + } + /// Get database options pub fn options(&self) -> &DbOptions { &self.options From 7a4a395ed83f2892225c6ad24b9eefcb0694732c Mon Sep 17 00:00:00 2001 From: Antigravity Brain Date: Sun, 1 Mar 2026 05:39:28 +0700 Subject: [PATCH 2/3] fix(core): Refactor O(1) index restore fallback to propagate error with ? instead of expect() (address PR feedback) --- crates/ruvector-core/src/vector_db.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/ruvector-core/src/vector_db.rs b/crates/ruvector-core/src/vector_db.rs index 87c5845b8..f5c33022b 100644 --- a/crates/ruvector-core/src/vector_db.rs +++ b/crates/ruvector-core/src/vector_db.rs @@ -102,13 +102,14 @@ impl VectorDB { } } - loaded_index.unwrap_or_else(|| { - Box::new(HnswIndex::new( + match loaded_index { + Some(idx) => idx, + None => Box::new(HnswIndex::new( options.dimensions, options.distance_metric, hnsw_config.clone(), - ).expect("Failed to initialize HNSW index")) as Box - }) + )?) as Box, + } } #[cfg(not(feature = "hnsw"))] { From fab8f98958981e0ffe4d1511b6a0bfd6d4dcbec1 Mon Sep 17 00:00:00 2001 From: Antigravity Brain Date: Sat, 7 Mar 2026 06:27:14 +0700 Subject: [PATCH 3/3] feat(ruvllm/core): Enforce QuantumVector across layers & fix multithreading limits on Windows This commit purges raw f32 usages, introduces QuantumVector bridging for WASM, and resolves STATUS_STACK_BUFFER_OVERRUN during concurrent workflows on Windows by isolating routing history and configuring RUST_MIN_STACK. --- .cargo/config.toml | 2 + README.md | 16 +- crates/ruvector-bench/README.md | 2 +- .../src/advanced_features/hybrid_search.rs | 17 +- .../src/advanced_features/mmr.rs | 79 +++--- crates/ruvector-core/src/agenticdb.rs | 75 ++--- crates/ruvector-core/src/embeddings.rs | 30 +- crates/ruvector-core/src/index.rs | 8 +- crates/ruvector-core/src/index/flat.rs | 27 +- crates/ruvector-core/src/index/hnsw.rs | 148 ++++------ crates/ruvector-core/src/quantization.rs | 151 ++++++++++ crates/ruvector-core/src/storage.rs | 17 +- crates/ruvector-core/src/storage_memory.rs | 23 +- crates/ruvector-core/src/types.rs | 72 ++++- crates/ruvector-core/src/vector_db.rs | 27 +- .../tests/advanced_features_integration.rs | 2 +- .../tests/quantum_native_test.rs | 66 +++++ .../ruvector-graph/src/hybrid/vector_index.rs | 20 +- .../integration-plans/09-benchmarking-plan.md | 8 +- crates/ruvector-router-ffi/README.md | 2 +- .../ruvector-sparse-inference-wasm/src/lib.rs | 262 ------------------ crates/ruvector-wasm/src/lib.rs | 8 +- crates/ruvllm/src/backends/candle_backend.rs | 10 +- crates/ruvllm/src/backends/mod.rs | 3 +- crates/ruvllm/src/bitnet/rlm_embedder.rs | 77 +---- crates/ruvllm/src/bitnet/rlm_refiner.rs | 16 +- crates/ruvllm/src/claude_flow/agent_router.rs | 17 +- .../ruvllm/src/claude_flow/flow_optimizer.rs | 18 +- crates/ruvllm/src/claude_flow/hnsw_router.rs | 85 +++--- .../src/claude_flow/hooks_integration.rs | 41 +-- .../src/claude_flow/pretrain_pipeline.rs | 2 +- .../ruvllm/src/claude_flow/reasoning_bank.rs | 208 ++++++-------- crates/ruvllm/src/context/agentic_memory.rs | 22 +- crates/ruvllm/src/context/context_manager.rs | 9 +- crates/ruvllm/src/context/episodic_memory.rs | 50 ++-- crates/ruvllm/src/context/semantic_cache.rs | 18 +- crates/ruvllm/src/context/working_memory.rs | 40 ++- crates/ruvllm/src/evaluation/real_harness.rs | 14 +- crates/ruvllm/src/kv_cache.rs | 5 +- crates/ruvllm/src/lib.rs | 11 +- crates/ruvllm/src/models/ruvltra.rs | 12 +- crates/ruvllm/src/models/ruvltra_medium.rs | 2 +- crates/ruvllm/src/optimization/sona_llm.rs | 12 +- crates/ruvllm/src/policy_store.rs | 20 +- crates/ruvllm/src/quality/coherence.rs | 30 +- crates/ruvllm/src/quality/diversity.rs | 19 +- .../src/reasoning_bank/consolidation.rs | 144 +++++++--- .../ruvllm/src/reasoning_bank/distillation.rs | 62 ++--- crates/ruvllm/src/reasoning_bank/mod.rs | 10 +- .../src/reasoning_bank/pattern_store.rs | 93 +++++-- .../ruvllm/src/reasoning_bank/trajectory.rs | 31 ++- crates/ruvllm/src/reasoning_bank/verdicts.rs | 3 +- crates/ruvllm/src/ruvector_integration.rs | 113 ++++---- crates/ruvllm/src/session_index.rs | 4 +- crates/ruvllm/src/sona/integration.rs | 62 +++-- crates/ruvllm/src/sona/mod.rs | 2 +- crates/ruvllm/src/tests/witness_log_tests.rs | 59 ++-- crates/ruvllm/src/utils.rs | 40 +++ crates/ruvllm/src/witness_log.rs | 46 +-- crates/ruvllm/tests/check_sizes.rs | 20 ++ crates/ruvllm/tests/e2e_integration.rs | 16 +- crates/ruvllm/tests/sona_integration.rs | 44 +-- crates/rvf/README.md | 4 +- crates/rvf/rvf-launch/src/qemu.rs | 25 +- crates/rvf/rvf-launch/src/qmp.rs | 27 ++ crates/rvlite/docs/INTEGRATION_SUCCESS.md | 2 +- crates/rvlite/src/lib.rs | 12 +- crates/rvlite/src/sql/executor.rs | 12 +- .../adr/ADR-001-ruvector-core-architecture.md | 4 +- docs/architecture/TECHNICAL_PLAN.md | 30 +- docs/benchmarks/BENCHMARK_COMPARISON.md | 2 +- docs/implementation/IMPROVEMENT_ROADMAP.md | 12 +- docs/research/executive-summary.md | 6 +- .../innovative-gnn-features-2024-2025.md | 8 +- .../00-executive-summary.md | 2 +- .../15-fifty-year-sota-vision.md | 2 +- ...R-STS-001-core-integration-architecture.md | 2 +- .../adr/ADR-STS-SOTA-research-analysis.md | 4 +- .../ADR-OSpipe-screenpipe-integration.md | 2 +- .../dna/adr/ADR-001-vision-and-context.md | 2 +- examples/rvf/README.md | 2 +- examples/vibecast-7sense/Cargo.toml | 2 +- .../sevensense-api/src/services/vector.rs | 4 +- .../src/domain/repository.rs | 2 +- .../crates/sevensense-vector/Cargo.toml | 6 +- .../crates/sevensense-vector/src/lib.rs | 2 +- npm/README.md | 2 +- .../agentic-synth/docs/INTEGRATIONS.md | 10 +- npm/packages/ruvector/README.md | 10 +- 89 files changed, 1425 insertions(+), 1325 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 crates/ruvector-core/tests/quantum_native_test.rs create mode 100644 crates/ruvllm/src/utils.rs create mode 100644 crates/ruvllm/tests/check_sizes.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 000000000..561a71d67 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[env] +RUST_MIN_STACK = "8388608" diff --git a/README.md b/README.md index 468a73b9e..1e91dbe4f 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ RuVector isn't a database you add to your stack β€” it's the entire stack. Self- | | Layer | Replaces | What It Does | |---|-------|----------|--------------| -| πŸ” | [**Search**](./crates/ruvector-core/README.md) | Pinecone, Weaviate, Qdrant | Self-learning HNSW β€” GNN improves results from every query | +| πŸ” | [**Search**](./crates/ruvector-core/README.md) | Pinecone, Weaviate, LegacyDB | Self-learning HNSW β€” GNN improves results from every query | | πŸ—„οΈ | [**Storage**](./crates/ruvector-core/README.md) | Separate database + cache | Vector store, graph DB, key-value cache β€” unified engine | | 🐘 | [**PostgreSQL**](./crates/ruvector-postgres/README.md) | pgvector, pg_embedding | Drop-in replacement β€” 230+ SQL functions, same interface but search gets smarter over time | | πŸ”— | [**Graph**](./crates/ruvector-graph/README.md) | Neo4j, Amazon Neptune | Cypher, W3C SPARQL 1.1, hyperedges β€” all built in | @@ -557,7 +557,7 @@ See how RuVector stacks up against popular vector databases across 40+ features Grouped comparison across 10 categories. RuVector is the only vector database that learns from usage, runs AI locally, and ships as a single self-booting file. **Performance & Storage** -| Feature | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| Feature | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---------|----------|----------|--------|--------|----------|----------| | Latency (p50) | **61 us** | ~2 ms | ~1 ms | ~5 ms | ~50 ms | ~5 ms | | Memory (1M vectors) | **200 MB*** | 2 GB | 1.5 GB | 1 GB | 3 GB | 1.5 GB | @@ -567,7 +567,7 @@ Grouped comparison across 10 categories. RuVector is the only vector database th | Sparse vectors (BM25/TF-IDF) | βœ… | βœ… | βœ… | βœ… | ❌ | βœ… | **Search & Query** -| Feature | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| Feature | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---------|----------|----------|--------|--------|----------|----------| | Vector similarity search | βœ… HNSW | βœ… | βœ… HNSW | βœ… HNSW | βœ… | βœ… HNSW | | Metadata filtering | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | @@ -590,7 +590,7 @@ Grouped comparison across 10 categories. RuVector is the only vector database th | ReasoningBank | Trajectory learning with verdict judgment | ❌ | **Local AI β€” no cloud APIs needed** -| Feature | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| Feature | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---------|----------|----------|--------|--------|----------|----------| | Built-in LLM runtime | βœ… ruvllm (GGUF) | ❌ | ❌ | ❌ | ❌ | ❌ | | Hardware acceleration | Metal, CUDA, ANE, WebGPU | N/A | N/A | GPU indexing | N/A | N/A | @@ -611,7 +611,7 @@ Grouped comparison across 10 categories. RuVector is the only vector database th | Verified training | Certificates, delta-apply rollback, fail-closed | ❌ | **Math & Solvers** -| Feature | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| Feature | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---------|----------|----------|--------|--------|----------|----------| | Sublinear solvers (8 algorithms) | O(log n) to O(sqrt(n)) | ❌ | ❌ | ❌ | ❌ | ❌ | | Dynamic min-cut | n^0.12 complexity | ❌ | ❌ | ❌ | ❌ | ❌ | @@ -621,7 +621,7 @@ Grouped comparison across 10 categories. RuVector is the only vector database th | Quantum error correction | ruQu dynamic min-cut | ❌ | ❌ | ❌ | ❌ | ❌ | **Distributed Systems** -| Feature | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| Feature | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---------|----------|----------|--------|--------|----------|----------| | Raft consensus | βœ… | ❌ managed | βœ… | ❌ | ❌ | βœ… | | Multi-master replication | βœ… vector clocks | ❌ | ❌ | βœ… | ❌ | βœ… | @@ -642,7 +642,7 @@ Grouped comparison across 10 categories. RuVector is the only vector database th | 25 segment types | VEC, INDEX, KERNEL, EBPF, WASM, COW_MAP, and 19 more | ❌ | **Platform & Deployment** -| Feature | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| Feature | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---------|----------|----------|--------|--------|----------|----------| | Browser / WASM | βœ… WebGPU, 58 KB | ❌ | ❌ | ❌ | ❌ | ❌ | | Edge standalone | βœ… rvLite | ❌ | ❌ | ❌ | ❌ | ❌ | @@ -665,7 +665,7 @@ Grouped comparison across 10 categories. RuVector is the only vector database th | Cognitum Gate | Cognitive AI gateway with TileZero acceleration | ❌ | **Licensing & Cost** -| | RuVector | Pinecone | Qdrant | Milvus | ChromaDB | Weaviate | +| | RuVector | Pinecone | LegacyDB | Milvus | ChromaDB | Weaviate | |---|----------|----------|--------|--------|----------|----------| | License | MIT (free forever) | Proprietary | Apache 2.0 | Apache 2.0 | Apache 2.0 | BSD-3 | | Self-hosted | βœ… | ❌ managed only | βœ… | βœ… | βœ… | βœ… | diff --git a/crates/ruvector-bench/README.md b/crates/ruvector-bench/README.md index 0eb644887..462667773 100644 --- a/crates/ruvector-bench/README.md +++ b/crates/ruvector-bench/README.md @@ -662,7 +662,7 @@ We welcome contributions to improve the benchmarking suite! ### Areas for Contribution - πŸ“Š Additional benchmark scenarios (concurrent writes, updates, deletes) -- πŸ”Œ Integration with other vector databases (Pinecone, Qdrant, Milvus) +- πŸ”Œ Integration with other vector databases (Pinecone, LegacyDB, Milvus) - πŸ“ˆ Enhanced visualization and reporting - 🎯 Real-world dataset support (SIFT, GIST, Deep1M loaders) - πŸš€ Performance optimization insights diff --git a/crates/ruvector-core/src/advanced_features/hybrid_search.rs b/crates/ruvector-core/src/advanced_features/hybrid_search.rs index 4ad4441b8..095651826 100644 --- a/crates/ruvector-core/src/advanced_features/hybrid_search.rs +++ b/crates/ruvector-core/src/advanced_features/hybrid_search.rs @@ -175,24 +175,15 @@ impl HybridSearch { } /// Perform hybrid search - /// - /// # Arguments - /// * `query_vector` - Query vector for semantic search - /// * `query_text` - Query text for keyword matching - /// * `k` - Number of results to return - /// * `vector_search_fn` - Function to perform vector similarity search - /// - /// # Returns - /// Combined and reranked search results pub fn search( &self, - query_vector: &[f32], + query_vector: &crate::types::QuantumVector, query_text: &str, k: usize, vector_search_fn: F, ) -> Result> where - F: Fn(&[f32], usize) -> Result>, + F: Fn(&crate::types::QuantumVector, usize) -> Result>, { // Get vector similarity results let vector_results = vector_search_fn(query_vector, k * 2)?; @@ -302,10 +293,10 @@ impl HybridSearch { /// Combined score holder #[derive(Debug, Clone)] struct CombinedScore { - id: VectorId, + id: crate::types::VectorId, vector_score: Option, keyword_score: Option, - vector: Option>, + vector: Option, metadata: Option>, } diff --git a/crates/ruvector-core/src/advanced_features/mmr.rs b/crates/ruvector-core/src/advanced_features/mmr.rs index 95f7e049f..4f1a0af1e 100644 --- a/crates/ruvector-core/src/advanced_features/mmr.rs +++ b/crates/ruvector-core/src/advanced_features/mmr.rs @@ -4,20 +4,20 @@ //! MMR = Ξ» Γ— Similarity(query, doc) - (1-Ξ») Γ— max Similarity(doc, selected_docs) use crate::error::{Result, RuvectorError}; -use crate::types::{DistanceMetric, SearchResult}; -use serde::{Deserialize, Serialize}; +use crate::types::{DistanceMetric, QuantumVector, SearchResult}; + +// ... (MMRConfig stays same for now, lambda is f32) /// Configuration for MMR search -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct MMRConfig { - /// Lambda parameter: balance between relevance (1.0) and diversity (0.0) - /// - Ξ» = 1.0: Pure relevance (standard similarity search) - /// - Ξ» = 0.5: Equal balance - /// - Ξ» = 0.0: Pure diversity + /// Diversity weight (0.0 to 1.0) + /// Higher lambda = more weight on relevance + /// Lower lambda = more weight on diversity pub lambda: f32, - /// Distance metric for similarity computation + /// Distance metric to use for diversity calculation pub metric: DistanceMetric, - /// Fetch multiplier for initial candidates (fetch k * multiplier results) + /// Fetch multiplier: fetch (k * fetch_multiplier) candidates before reranking pub fetch_multiplier: f32, } @@ -31,38 +31,26 @@ impl Default for MMRConfig { } } -/// MMR search implementation -#[derive(Debug, Clone)] +/// MMR Reranker pub struct MMRSearch { - /// Configuration - pub config: MMRConfig, + config: MMRConfig, } impl MMRSearch { - /// Create a new MMR search instance pub fn new(config: MMRConfig) -> Result { - if !(0.0..=1.0).contains(&config.lambda) { - return Err(RuvectorError::InvalidParameter(format!( - "Lambda must be in [0, 1], got {}", - config.lambda - ))); + if config.lambda < 0.0 || config.lambda > 1.0 { + return Err(RuvectorError::InvalidParameter( + "MMR lambda must be between 0.0 and 1.0".to_string(), + )); } - Ok(Self { config }) } + // ... (new stays same) /// Perform MMR-based reranking of search results - /// - /// # Arguments - /// * `query` - Query vector - /// * `candidates` - Initial search results (sorted by relevance) - /// * `k` - Number of diverse results to return - /// - /// # Returns - /// Reranked results optimizing for both relevance and diversity pub fn rerank( &self, - query: &[f32], + query: &QuantumVector, candidates: Vec, k: usize, ) -> Result> { @@ -111,7 +99,7 @@ impl MMRSearch { /// Compute MMR score for a candidate fn compute_mmr_score( &self, - _query: &[f32], + _query: &QuantumVector, candidate: &SearchResult, selected: &[SearchResult], ) -> Result { @@ -130,7 +118,9 @@ impl MMRSearch { .iter() .filter_map(|s| s.vector.as_ref()) .map(|selected_vec| { - let dist = compute_distance(candidate_vec, selected_vec, self.config.metric); + let a_f32 = candidate_vec.reconstruct(); + let b_f32 = selected_vec.reconstruct(); + let dist = compute_distance(&a_f32, &b_f32, self.config.metric); self.distance_to_similarity(dist) }) .max_by(|a, b| a.partial_cmp(b).unwrap()) @@ -154,17 +144,14 @@ impl MMRSearch { } /// Perform end-to-end MMR search - /// - /// # Arguments - /// * `query` - Query vector - /// * `k` - Number of diverse results to return - /// * `search_fn` - Function to perform initial similarity search - /// - /// # Returns - /// Diverse search results - pub fn search(&self, query: &[f32], k: usize, search_fn: F) -> Result> + pub fn search( + &self, + query: &QuantumVector, + k: usize, + search_fn: F, + ) -> Result> where - F: Fn(&[f32], usize) -> Result>, + F: Fn(&QuantumVector, usize) -> Result>, { // Fetch more candidates than needed let fetch_k = (k as f32 * self.config.fetch_multiplier).ceil() as usize; @@ -225,7 +212,7 @@ mod tests { SearchResult { id: id.to_string(), score, - vector: Some(vector), + vector: Some(QuantumVector::F32(vector)), metadata: None, } } @@ -254,7 +241,7 @@ mod tests { }; let mmr = MMRSearch::new(config).unwrap(); - let query = vec![1.0, 0.0, 0.0]; + let query = QuantumVector::F32(vec![1.0, 0.0, 0.0]); // Create candidates with varying similarity let candidates = vec![ @@ -282,7 +269,7 @@ mod tests { }; let mmr = MMRSearch::new(config).unwrap(); - let query = vec![1.0, 0.0, 0.0]; + let query = QuantumVector::F32(vec![1.0, 0.0, 0.0]); let candidates = vec![ create_search_result("doc1", 0.1, vec![0.9, 0.1, 0.0]), @@ -306,7 +293,7 @@ mod tests { }; let mmr = MMRSearch::new(config).unwrap(); - let query = vec![1.0, 0.0, 0.0]; + let query = QuantumVector::F32(vec![1.0, 0.0, 0.0]); let candidates = vec![ create_search_result("doc1", 0.1, vec![0.9, 0.1, 0.0]), @@ -328,7 +315,7 @@ mod tests { fn test_mmr_empty_candidates() { let config = MMRConfig::default(); let mmr = MMRSearch::new(config).unwrap(); - let query = vec![1.0, 0.0, 0.0]; + let query = QuantumVector::F32(vec![1.0, 0.0, 0.0]); let results = mmr.rerank(&query, Vec::new(), 5).unwrap(); assert!(results.is_empty()); diff --git a/crates/ruvector-core/src/agenticdb.rs b/crates/ruvector-core/src/agenticdb.rs index 6a9ac36b7..74e33878b 100644 --- a/crates/ruvector-core/src/agenticdb.rs +++ b/crates/ruvector-core/src/agenticdb.rs @@ -49,7 +49,7 @@ pub struct ReflexionEpisode { pub actions: Vec, pub observations: Vec, pub critique: String, - pub embedding: Vec, + pub embedding: QuantumVector, pub timestamp: i64, pub metadata: Option>, } @@ -62,7 +62,7 @@ pub struct Skill { pub description: String, pub parameters: HashMap, pub examples: Vec, - pub embedding: Vec, + pub embedding: QuantumVector, pub usage_count: usize, pub success_rate: f64, pub created_at: i64, @@ -77,7 +77,7 @@ pub struct CausalEdge { pub effects: Vec, // Hypergraph: multiple effects pub confidence: f64, pub context: String, - pub embedding: Vec, + pub embedding: QuantumVector, pub observations: usize, pub timestamp: i64, } @@ -98,10 +98,10 @@ pub struct LearningSession { /// Single RL experience #[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)] pub struct Experience { - pub state: Vec, - pub action: Vec, + pub state: QuantumVector, + pub action: QuantumVector, pub reward: f64, - pub next_state: Vec, + pub next_state: QuantumVector, pub done: bool, pub timestamp: i64, } @@ -109,7 +109,7 @@ pub struct Experience { /// Prediction with confidence interval #[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)] pub struct Prediction { - pub action: Vec, + pub action: QuantumVector, pub confidence_lower: f64, pub confidence_upper: f64, pub mean_confidence: f64, @@ -616,10 +616,10 @@ impl AgenticDB { pub fn add_experience( &self, session_id: &str, - state: Vec, - action: Vec, + state: QuantumVector, + action: QuantumVector, reward: f64, - next_state: Vec, + next_state: QuantumVector, done: bool, ) -> Result<()> { let read_txn = self.db.begin_read()?; @@ -661,7 +661,11 @@ impl AgenticDB { } /// Predict action with confidence interval - pub fn predict_with_confidence(&self, session_id: &str, state: Vec) -> Result { + pub fn predict_with_confidence( + &self, + session_id: &str, + state: QuantumVector, + ) -> Result { let read_txn = self.db.begin_read()?; let table = read_txn.open_table(LEARNING_TABLE)?; @@ -677,8 +681,11 @@ impl AgenticDB { let mut similar_actions = Vec::new(); let mut rewards = Vec::new(); + let state_f32 = state.reconstruct(); + for exp in &session.experiences { - let distance = euclidean_distance(&state, &exp.state); + let exp_state_f32 = exp.state.reconstruct(); + let distance = euclidean_distance(&state_f32, &exp_state_f32); if distance < 1.0 { // Similarity threshold similar_actions.push(exp.action.clone()); @@ -689,7 +696,7 @@ impl AgenticDB { if similar_actions.is_empty() { // Return random action if no similar states return Ok(Prediction { - action: vec![0.0; session.action_dim], + action: QuantumVector::F32(vec![0.0; session.action_dim]), confidence_lower: 0.0, confidence_upper: 0.0, mean_confidence: 0.0, @@ -698,12 +705,13 @@ impl AgenticDB { // Average actions weighted by rewards let total_reward: f64 = rewards.iter().sum(); - let mut action = vec![0.0; session.action_dim]; + let mut action_f32 = vec![0.0; session.action_dim]; for (act, reward) in similar_actions.iter().zip(rewards.iter()) { let weight = reward / total_reward; - for (i, val) in act.iter().enumerate() { - action[i] += val * weight as f32; + let act_f32 = act.reconstruct(); + for (i, val) in act_f32.iter().enumerate() { + action_f32[i] += val * weight as f32; } } @@ -712,7 +720,7 @@ impl AgenticDB { let std_dev = calculate_std_dev(&rewards, mean_reward); Ok(Prediction { - action, + action: QuantumVector::F32(action_f32), confidence_lower: mean_reward - 1.96 * std_dev, confidence_upper: mean_reward + 1.96 * std_dev, mean_confidence: mean_reward, @@ -756,7 +764,7 @@ impl AgenticDB { /// let embedding = db.generate_text_embedding("hello world")?; /// # Ok::<(), Box>(()) /// ``` - fn generate_text_embedding(&self, text: &str) -> Result> { + fn generate_text_embedding(&self, text: &str) -> Result { self.embedding_provider.embed(text) } } @@ -802,7 +810,7 @@ pub struct PolicyAction { /// Q-value estimate pub q_value: f64, /// State embedding - pub state_embedding: Vec, + pub state_embedding: QuantumVector, /// Timestamp pub timestamp: i64, } @@ -830,7 +838,7 @@ impl<'a> PolicyMemoryStore<'a> { pub fn store_policy( &self, state_id: &str, - state_embedding: Vec, + state_embedding: QuantumVector, action: &str, reward: f64, q_value: f64, @@ -873,11 +881,11 @@ impl<'a> PolicyMemoryStore<'a> { /// Retrieve similar states for policy lookup pub fn retrieve_similar_states( &self, - state_embedding: &[f32], + state_embedding: QuantumVector, k: usize, ) -> Result> { let results = self.db.vector_db.search(SearchQuery { - vector: state_embedding.to_vec(), + vector: state_embedding, k, filter: Some({ let mut filter = HashMap::new(); @@ -930,7 +938,11 @@ impl<'a> PolicyMemoryStore<'a> { } /// Get the best action for a state based on Q-values - pub fn get_best_action(&self, state_embedding: &[f32], k: usize) -> Result> { + pub fn get_best_action( + &self, + state_embedding: QuantumVector, + k: usize, + ) -> Result> { let similar = self.retrieve_similar_states(state_embedding, k)?; similar @@ -973,7 +985,7 @@ pub struct SessionTurn { /// Content pub content: String, /// Embedding - pub embedding: Vec, + pub embedding: QuantumVector, /// Timestamp pub timestamp: i64, /// TTL expiry @@ -1145,7 +1157,7 @@ pub struct WitnessEntry { /// Action details pub details: String, /// Action embedding for semantic search - pub embedding: Vec, + pub embedding: QuantumVector, /// Timestamp pub timestamp: i64, /// Additional metadata @@ -1371,7 +1383,7 @@ mod tests { let mut params = HashMap::new(); params.insert("input".to_string(), "string".to_string()); - let skill_id = db.create_skill( + let _skill_id = db.create_skill( "Parse JSON".to_string(), "Parse JSON from string".to_string(), params, @@ -1388,7 +1400,7 @@ mod tests { fn test_causal_edge() -> Result<()> { let db = create_test_db()?; - let edge_id = db.add_causal_edge( + let _edge_id = db.add_causal_edge( vec!["rain".to_string()], vec!["wet ground".to_string()], 0.95, @@ -1409,14 +1421,15 @@ mod tests { db.add_experience( &session_id, - vec![1.0, 0.0, 0.0, 0.0], - vec![1.0, 0.0], + QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), + QuantumVector::F32(vec![1.0, 0.0]), 1.0, - vec![0.0, 1.0, 0.0, 0.0], + QuantumVector::F32(vec![0.0, 1.0, 0.0, 0.0]), false, )?; - let prediction = db.predict_with_confidence(&session_id, vec![1.0, 0.0, 0.0, 0.0])?; + let prediction = + db.predict_with_confidence(&session_id, QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]))?; assert_eq!(prediction.action.len(), 2); Ok(()) diff --git a/crates/ruvector-core/src/embeddings.rs b/crates/ruvector-core/src/embeddings.rs index 9dfaa6329..ff61e04ad 100644 --- a/crates/ruvector-core/src/embeddings.rs +++ b/crates/ruvector-core/src/embeddings.rs @@ -29,10 +29,12 @@ use crate::error::Result; use crate::error::RuvectorError; use std::sync::Arc; +use crate::types::QuantumVector; + /// Trait for text embedding providers pub trait EmbeddingProvider: Send + Sync { /// Generate embedding vector for the given text - fn embed(&self, text: &str) -> Result>; + fn embed(&self, text: &str) -> Result; /// Get the dimensionality of embeddings produced by this provider fn dimensions(&self) -> usize; @@ -64,7 +66,7 @@ impl HashEmbedding { } impl EmbeddingProvider for HashEmbedding { - fn embed(&self, text: &str) -> Result> { + fn embed(&self, text: &str) -> Result { let mut embedding = vec![0.0; self.dimensions]; let bytes = text.as_bytes(); @@ -80,7 +82,7 @@ impl EmbeddingProvider for HashEmbedding { } } - Ok(embedding) + Ok(QuantumVector::F32(embedding)) } fn dimensions(&self) -> usize { @@ -161,7 +163,7 @@ pub mod candle { } impl EmbeddingProvider for CandleEmbedding { - fn embed(&self, _text: &str) -> Result> { + fn embed(&self, _text: &str) -> Result { Err(RuvectorError::ModelInferenceError( "Candle embedding not implemented - use ApiEmbedding instead".to_string(), )) @@ -274,7 +276,7 @@ impl ApiEmbedding { #[cfg(feature = "api-embeddings")] impl EmbeddingProvider for ApiEmbedding { - fn embed(&self, text: &str) -> Result> { + fn embed(&self, text: &str) -> Result { let request_body = serde_json::json!({ "input": text, "model": self.model, @@ -331,16 +333,16 @@ impl EmbeddingProvider for ApiEmbedding { )); }; - let embedding_vec: Result> = embedding + let embedding_vec: Vec = embedding .iter() .map(|v| { v.as_f64().map(|f| f as f32).ok_or_else(|| { RuvectorError::ModelInferenceError("Invalid embedding value".to_string()) }) }) - .collect(); + .collect::>>()?; - embedding_vec + Ok(QuantumVector::F32(embedding_vec)) } fn dimensions(&self) -> usize { @@ -367,10 +369,15 @@ mod tests { let emb2 = provider.embed("hello world").unwrap(); assert_eq!(emb1.len(), 128); - assert_eq!(emb1, emb2, "Same text should produce same embedding"); + assert_eq!( + emb1.to_vec(), + emb2.to_vec(), + "Same text should produce same embedding" + ); // Check normalization - let norm: f32 = emb1.iter().map(|x| x * x).sum::().sqrt(); + let v1 = emb1.to_vec(); + let norm: f32 = v1.iter().map(|x| x * x).sum::().sqrt(); assert!((norm - 1.0).abs() < 1e-5, "Embedding should be normalized"); } @@ -382,7 +389,8 @@ mod tests { let emb2 = provider.embed("world").unwrap(); assert_ne!( - emb1, emb2, + emb1.to_vec(), + emb2.to_vec(), "Different text should produce different embeddings" ); } diff --git a/crates/ruvector-core/src/index.rs b/crates/ruvector-core/src/index.rs index 095500a09..6bb8e2c2e 100644 --- a/crates/ruvector-core/src/index.rs +++ b/crates/ruvector-core/src/index.rs @@ -5,15 +5,15 @@ pub mod flat; pub mod hnsw; use crate::error::Result; -use crate::types::{SearchResult, VectorId}; +use crate::types::{QuantumVector, SearchResult, VectorId}; /// Trait for vector index implementations pub trait VectorIndex: Send + Sync { /// Add a vector to the index - fn add(&mut self, id: VectorId, vector: Vec) -> Result<()>; + fn add(&mut self, id: VectorId, vector: QuantumVector) -> Result<()>; /// Add multiple vectors in batch - fn add_batch(&mut self, entries: Vec<(VectorId, Vec)>) -> Result<()> { + fn add_batch(&mut self, entries: Vec<(VectorId, QuantumVector)>) -> Result<()> { for (id, vector) in entries { self.add(id, vector)?; } @@ -21,7 +21,7 @@ pub trait VectorIndex: Send + Sync { } /// Search for k nearest neighbors - fn search(&self, query: &[f32], k: usize) -> Result>; + fn search(&self, query: &QuantumVector, k: usize) -> Result>; /// Remove a vector from the index fn remove(&mut self, id: &VectorId) -> Result; diff --git a/crates/ruvector-core/src/index/flat.rs b/crates/ruvector-core/src/index/flat.rs index b2595b47d..d97f38d09 100644 --- a/crates/ruvector-core/src/index/flat.rs +++ b/crates/ruvector-core/src/index/flat.rs @@ -1,9 +1,8 @@ //! Flat (brute-force) index for baseline and small datasets -use crate::distance::distance; use crate::error::Result; use crate::index::VectorIndex; -use crate::types::{DistanceMetric, SearchResult, VectorId}; +use crate::types::{DistanceMetric, QuantumVector, SearchResult, VectorId}; use dashmap::DashMap; #[cfg(all(feature = "parallel", not(target_arch = "wasm32")))] @@ -11,7 +10,7 @@ use rayon::prelude::*; /// Flat index using brute-force search pub struct FlatIndex { - vectors: DashMap>, + vectors: DashMap, metric: DistanceMetric, _dimensions: usize, } @@ -28,12 +27,14 @@ impl FlatIndex { } impl VectorIndex for FlatIndex { - fn add(&mut self, id: VectorId, vector: Vec) -> Result<()> { + fn add(&mut self, id: VectorId, vector: QuantumVector) -> Result<()> { self.vectors.insert(id, vector); Ok(()) } - fn search(&self, query: &[f32], k: usize) -> Result> { + fn search(&self, query: &QuantumVector, k: usize) -> Result> { + let query_f32 = query.reconstruct(); + // Distance calculation - parallel on native, sequential on WASM #[cfg(all(feature = "parallel", not(target_arch = "wasm32")))] let mut results: Vec<_> = self @@ -42,8 +43,8 @@ impl VectorIndex for FlatIndex { .par_bridge() .map(|entry| { let id = entry.key().clone(); - let vector = entry.value(); - let dist = distance(query, vector, self.metric)?; + let vector_f32 = entry.value().reconstruct(); + let dist = crate::distance::distance(&query_f32, &vector_f32, self.metric)?; Ok((id, dist)) }) .collect::>>()?; @@ -54,8 +55,8 @@ impl VectorIndex for FlatIndex { .iter() .map(|entry| { let id = entry.key().clone(); - let vector = entry.value(); - let dist = distance(query, vector, self.metric)?; + let vector_f32 = entry.value().reconstruct(); + let dist = crate::distance::distance(&query_f32, &vector_f32, self.metric)?; Ok((id, dist)) }) .collect::>>()?; @@ -92,11 +93,11 @@ mod tests { fn test_flat_index() -> Result<()> { let mut index = FlatIndex::new(3, DistanceMetric::Euclidean); - index.add("v1".to_string(), vec![1.0, 0.0, 0.0])?; - index.add("v2".to_string(), vec![0.0, 1.0, 0.0])?; - index.add("v3".to_string(), vec![0.0, 0.0, 1.0])?; + index.add("v1".to_string(), QuantumVector::F32(vec![1.0, 0.0, 0.0]))?; + index.add("v2".to_string(), QuantumVector::F32(vec![0.0, 1.0, 0.0]))?; + index.add("v3".to_string(), QuantumVector::F32(vec![0.0, 0.0, 1.0]))?; - let query = vec![1.0, 0.0, 0.0]; + let query = QuantumVector::F32(vec![1.0, 0.0, 0.0]); let results = index.search(&query, 2)?; assert_eq!(results.len(), 2); diff --git a/crates/ruvector-core/src/index/hnsw.rs b/crates/ruvector-core/src/index/hnsw.rs index ead513b17..09857f50a 100644 --- a/crates/ruvector-core/src/index/hnsw.rs +++ b/crates/ruvector-core/src/index/hnsw.rs @@ -1,9 +1,9 @@ //! HNSW (Hierarchical Navigable Small World) index implementation -use crate::distance::distance; +// use crate::distance::distance; use crate::error::{Result, RuvectorError}; use crate::index::VectorIndex; -use crate::types::{DistanceMetric, HnswConfig, SearchResult, VectorId}; +use crate::types::{DistanceMetric, HnswConfig, QuantumVector, SearchResult, VectorId}; use bincode::{Decode, Encode}; use dashmap::DashMap; use hnsw_rs::prelude::*; @@ -21,9 +21,12 @@ impl DistanceFn { } } -impl Distance for DistanceFn { - fn eval(&self, a: &[f32], b: &[f32]) -> f32 { - distance(a, b, self.metric).unwrap_or(f32::MAX) +impl Distance for DistanceFn { + fn eval(&self, a: &[QuantumVector], b: &[QuantumVector]) -> f32 { + // Direct distance on QuantumVectors + let a_f32 = a[0].reconstruct(); + let b_f32 = b[0].reconstruct(); + crate::distance::distance(&a_f32, &b_f32, self.metric).unwrap_or(f32::MAX) } } @@ -36,8 +39,8 @@ pub struct HnswIndex { } struct HnswInner { - hnsw: Hnsw<'static, f32, DistanceFn>, - vectors: DashMap>, + hnsw: Hnsw<'static, QuantumVector, DistanceFn>, + vectors: DashMap, id_to_idx: DashMap, idx_to_id: DashMap, next_idx: usize, @@ -46,60 +49,24 @@ struct HnswInner { /// Serializable HNSW index state #[derive(Encode, Decode, Clone)] pub struct HnswState { - vectors: Vec<(String, Vec)>, + vectors: Vec<(String, QuantumVector)>, id_to_idx: Vec<(String, usize)>, idx_to_id: Vec<(usize, String)>, next_idx: usize, - config: SerializableHnswConfig, + config: HnswConfig, dimensions: usize, - metric: SerializableDistanceMetric, -} - -#[derive(Encode, Decode, Clone)] -struct SerializableHnswConfig { - m: usize, - ef_construction: usize, - ef_search: usize, - max_elements: usize, -} - -#[derive(Encode, Decode, Clone, Copy)] -enum SerializableDistanceMetric { - Euclidean, - Cosine, - DotProduct, - Manhattan, -} - -impl From for SerializableDistanceMetric { - fn from(metric: DistanceMetric) -> Self { - match metric { - DistanceMetric::Euclidean => SerializableDistanceMetric::Euclidean, - DistanceMetric::Cosine => SerializableDistanceMetric::Cosine, - DistanceMetric::DotProduct => SerializableDistanceMetric::DotProduct, - DistanceMetric::Manhattan => SerializableDistanceMetric::Manhattan, - } - } + metric: DistanceMetric, } -impl From for DistanceMetric { - fn from(metric: SerializableDistanceMetric) -> Self { - match metric { - SerializableDistanceMetric::Euclidean => DistanceMetric::Euclidean, - SerializableDistanceMetric::Cosine => DistanceMetric::Cosine, - SerializableDistanceMetric::DotProduct => DistanceMetric::DotProduct, - SerializableDistanceMetric::Manhattan => DistanceMetric::Manhattan, - } - } -} +// Redundant serializable structs removed as they are now in types.rs impl HnswIndex { /// Create a new HNSW index pub fn new(dimensions: usize, metric: DistanceMetric, config: HnswConfig) -> Result { let distance_fn = DistanceFn::new(metric); - // Create HNSW with configured parameters - let hnsw = Hnsw::::new( + // Create HNSW with configured parameters (QuantumVector native) + let hnsw = Hnsw::::new( config.m, config.max_elements, dimensions, @@ -153,14 +120,9 @@ impl HnswIndex { .map(|entry| (*entry.key(), entry.value().clone())) .collect(), next_idx: inner.next_idx, - config: SerializableHnswConfig { - m: self.config.m, - ef_construction: self.config.ef_construction, - ef_search: self.config.ef_search, - max_elements: self.config.max_elements, - }, + config: self.config.clone(), dimensions: self.dimensions, - metric: self.metric.into(), + metric: self.metric, }; bincode::encode_to_vec(&state, bincode::config::standard()).map_err(|e| { @@ -189,7 +151,7 @@ impl HnswIndex { let metric: DistanceMetric = state.metric.into(); let distance_fn = DistanceFn::new(metric); - let mut hnsw = Hnsw::<'static, f32, DistanceFn>::new( + let mut hnsw = Hnsw::::new( config.m, config.max_elements, dimensions, @@ -206,12 +168,12 @@ impl HnswIndex { let idx = *entry.key(); let id = entry.value(); if let Some(vector) = state.vectors.iter().find(|(vid, _)| vid == id) { - // Use insert_data method with slice and idx - hnsw.insert_data(&vector.1, idx); + // Use insert_data method with QuantumVector + hnsw.insert_data(std::slice::from_ref(&vector.1), idx); } } - let vectors_map: DashMap> = state.vectors.into_iter().collect(); + let vectors_map: DashMap = state.vectors.into_iter().collect(); Ok(Self { inner: Arc::new(RwLock::new(HnswInner { @@ -230,21 +192,21 @@ impl HnswIndex { /// Search with custom efSearch parameter pub fn search_with_ef( &self, - query: &[f32], + query: &QuantumVector, k: usize, ef_search: usize, ) -> Result> { - if query.len() != self.dimensions { + if query.reconstruct().len() != self.dimensions { return Err(RuvectorError::DimensionMismatch { expected: self.dimensions, - actual: query.len(), + actual: query.reconstruct().len(), }); } let inner = self.inner.read(); // Use HNSW search with custom ef parameter (knbn) - let neighbors = inner.hnsw.search(query, k, ef_search); + let neighbors = inner.hnsw.search(std::slice::from_ref(query), k, ef_search); Ok(neighbors .into_iter() @@ -261,11 +223,11 @@ impl HnswIndex { } impl VectorIndex for HnswIndex { - fn add(&mut self, id: VectorId, vector: Vec) -> Result<()> { - if vector.len() != self.dimensions { + fn add(&mut self, id: VectorId, vector: QuantumVector) -> Result<()> { + if vector.reconstruct().len() != self.dimensions { return Err(RuvectorError::DimensionMismatch { expected: self.dimensions, - actual: vector.len(), + actual: vector.reconstruct().len(), }); } @@ -273,8 +235,8 @@ impl VectorIndex for HnswIndex { let idx = inner.next_idx; inner.next_idx += 1; - // Insert into HNSW graph using insert_data - inner.hnsw.insert_data(&vector, idx); + // Insert into HNSW graph using insert_data (QuantumVector native) + inner.hnsw.insert_data(std::slice::from_ref(&vector), idx); // Store mappings inner.vectors.insert(id.clone(), vector); @@ -284,13 +246,13 @@ impl VectorIndex for HnswIndex { Ok(()) } - fn add_batch(&mut self, entries: Vec<(VectorId, Vec)>) -> Result<()> { + fn add_batch(&mut self, entries: Vec<(VectorId, QuantumVector)>) -> Result<()> { // Validate all dimensions first for (_, vector) in &entries { - if vector.len() != self.dimensions { + if vector.reconstruct().len() != self.dimensions { return Err(RuvectorError::DimensionMismatch { expected: self.dimensions, - actual: vector.len(), + actual: vector.reconstruct().len(), }); } } @@ -300,22 +262,20 @@ impl VectorIndex for HnswIndex { // Prepare batch data for insertion // First, assign indices and collect vector data let data_with_ids: Vec<_> = entries - .iter() + .into_iter() .enumerate() .map(|(i, (id, vector))| { let idx = inner.next_idx + i; - (id.clone(), idx, vector.clone()) + (id, idx, vector) }) .collect(); // Update next_idx - inner.next_idx += entries.len(); + inner.next_idx += data_with_ids.len(); - // Insert into HNSW sequentially - // Note: Using sequential insertion to avoid Send requirements with RwLock guard - // For large batches, consider restructuring to use hnsw_rs parallel_insert + // Insert into HNSW sequentially (Hnsw-rs native optimized) for (_id, idx, vector) in &data_with_ids { - inner.hnsw.insert_data(vector, *idx); + inner.hnsw.insert_data(std::slice::from_ref(vector), *idx); } // Store mappings @@ -328,7 +288,7 @@ impl VectorIndex for HnswIndex { Ok(()) } - fn search(&self, query: &[f32], k: usize) -> Result> { + fn search(&self, query: &QuantumVector, k: usize) -> Result> { // Use configured ef_search self.search_with_ef(query, k, self.config.ef_search) } @@ -363,21 +323,25 @@ impl VectorIndex for HnswIndex { mod tests { use super::*; - fn generate_random_vectors(count: usize, dimensions: usize) -> Vec> { + fn generate_random_vectors(count: usize, dimensions: usize) -> Vec { use rand::Rng; let mut rng = rand::thread_rng(); (0..count) - .map(|_| (0..dimensions).map(|_| rng.gen::()).collect()) + .map(|_| { + let v: Vec = (0..dimensions).map(|_| rng.gen::()).collect(); + QuantumVector::F32(v) + }) .collect() } - fn normalize_vector(v: &[f32]) -> Vec { - let norm = v.iter().map(|x| x * x).sum::().sqrt(); + fn normalize_quantum(v: &QuantumVector) -> QuantumVector { + let vec = v.reconstruct(); + let norm = vec.iter().map(|x| x * x).sum::().sqrt(); if norm > 0.0 { - v.iter().map(|x| x / norm).collect() + QuantumVector::F32(vec.iter().map(|x| x / norm).collect()) } else { - v.to_vec() + QuantumVector::F32(vec) } } @@ -403,14 +367,14 @@ mod tests { // Insert a few vectors let vectors = generate_random_vectors(100, 128); for (i, vector) in vectors.iter().enumerate() { - let normalized = normalize_vector(vector); + let normalized = normalize_quantum(vector); index.add(format!("vec_{}", i), normalized)?; } assert_eq!(index.len(), 100); // Search for the first vector - let query = normalize_vector(&vectors[0]); + let query = normalize_quantum(&vectors[0]); let results = index.search(&query, 10)?; assert!(!results.is_empty()); @@ -428,7 +392,7 @@ mod tests { let entries: Vec<_> = vectors .iter() .enumerate() - .map(|(i, v)| (format!("vec_{}", i), normalize_vector(v))) + .map(|(i, v)| (format!("vec_{}", i), normalize_quantum(v))) .collect(); index.add_batch(entries)?; @@ -451,7 +415,7 @@ mod tests { // Insert vectors let vectors = generate_random_vectors(50, 128); for (i, vector) in vectors.iter().enumerate() { - let normalized = normalize_vector(vector); + let normalized = normalize_quantum(vector); index.add(format!("vec_{}", i), normalized)?; } @@ -464,7 +428,7 @@ mod tests { assert_eq!(restored_index.len(), 50); // Test search on restored index - let query = normalize_vector(&vectors[0]); + let query = normalize_quantum(&vectors[0]); let results = restored_index.search(&query, 5)?; assert!(!results.is_empty()); @@ -477,7 +441,7 @@ mod tests { let config = HnswConfig::default(); let mut index = HnswIndex::new(128, DistanceMetric::Cosine, config)?; - let result = index.add("test".to_string(), vec![1.0; 64]); + let result = index.add("test".to_string(), QuantumVector::F32(vec![1.0; 64])); assert!(result.is_err()); Ok(()) diff --git a/crates/ruvector-core/src/quantization.rs b/crates/ruvector-core/src/quantization.rs index 8d3ffed16..79c794224 100644 --- a/crates/ruvector-core/src/quantization.rs +++ b/crates/ruvector-core/src/quantization.rs @@ -17,8 +17,69 @@ //! - Separate accumulator strategy to reduce data dependencies use crate::error::Result; +use crate::types::QuantumVector; use serde::{Deserialize, Serialize}; +impl QuantumVector { + /// Create a QuantumVector from a raw f32 vector using the specified config + pub fn from_f32(vector: &[f32], config: &crate::types::QuantizationConfig) -> Self { + match config { + crate::types::QuantizationConfig::None => QuantumVector::F32(vector.to_vec()), + crate::types::QuantizationConfig::Scalar => { + let q = ScalarQuantized::quantize(vector); + // Note: Types.rs and Quantization.rs might have slight drift in naming for v2 + // We'll map to Q8 for the unified QuantumVector + QuantumVector::Q8( + q.data.into_iter().map(|v| (v as i16 - 128) as i8).collect(), + q.scale, + ) + } + crate::types::QuantizationConfig::NF4 => { + let q = NF4Quantized::quantize(vector); + QuantumVector::NF4 { + data: q.data, + scale: q.scale, + orig_len: q.dimensions, + } + } + _ => QuantumVector::F32(vector.to_vec()), + } + } + + /// Reconstruct back to f32 (for evaluation or legacy support) + pub fn reconstruct(&self) -> Vec { + match self { + QuantumVector::F32(v) => v.clone(), + QuantumVector::Q8(data, scale) => data.iter().map(|&v| v as f32 * scale).collect(), + QuantumVector::NF4 { + data, + scale, + orig_len, + } => { + let q = NF4Quantized { + data: data.clone(), + scale: *scale, + dimensions: *orig_len, + }; + q.reconstruct() + } + QuantumVector::Binary(data) => { + let mut v = Vec::with_capacity(data.len() * 8); + for &byte in data { + for i in 0..8 { + v.push(if (byte >> (7 - i)) & 1 == 1 { + 1.0 + } else { + -1.0 + }); + } + } + v + } + } + } +} + /// Trait for quantized vector representations pub trait QuantizedVector: Send + Sync { /// Quantize a full-precision vector @@ -284,6 +345,96 @@ impl Int4Quantized { } } +/// Normal Float 4 (NF4) quantization (8x compression) +/// Based on standard normal distribution quantiles. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct NF4Quantized { + pub data: Vec, + pub scale: f32, + pub dimensions: usize, +} + +const NF4_VALUES: [f32; 16] = [ + -1.0, + -0.6961928, + -0.52507305, + -0.3949174, + -0.28444138, + -0.18477343, + -0.091050036, + 0.0, + 0.0795803, + 0.16093205, + 0.2461123, + 0.33791524, + 0.43546617, + 0.54850423, + 0.6858564, + 1.0, +]; + +impl NF4Quantized { + pub fn quantize(vector: &[f32]) -> Self { + let mut amax = 0.0f32; + for &v in vector { + amax = amax.max(v.abs()); + } + let scale = amax; + let inv_scale = if scale > 0.0 { 1.0 / scale } else { 1.0 }; + let dimensions = vector.len(); + let mut data = vec![0u8; dimensions.div_ceil(2)]; + + for (i, &v) in vector.iter().enumerate() { + let q = Self::nearest_nf4(v * inv_scale); + let byte_idx = i / 2; + if i % 2 == 0 { + data[byte_idx] |= q; + } else { + data[byte_idx] |= q << 4; + } + } + Self { + data, + scale, + dimensions, + } + } + + fn nearest_nf4(val: f32) -> u8 { + NF4_VALUES + .iter() + .enumerate() + .min_by(|(_, &a), (_, &b)| (val - a).abs().partial_cmp(&(val - b).abs()).unwrap()) + .map(|(idx, _)| idx as u8) + .unwrap_or(0) + } + + pub fn distance(&self, other: &Self) -> f32 { + let avg_scale = (self.scale + other.scale) / 2.0; + let mut sum_sq = 0.0f32; + for i in 0..self.dimensions { + let b_idx = i / 2; + let shift = if i % 2 == 0 { 0 } else { 4 }; + let q_a = (self.data[b_idx] >> shift) & 0x0F; + let q_b = (other.data[b_idx] >> shift) & 0x0F; + let diff = NF4_VALUES[q_a as usize] - NF4_VALUES[q_b as usize]; + sum_sq += diff * diff; + } + sum_sq.sqrt() * avg_scale + } + + pub fn reconstruct(&self) -> Vec { + let mut res = Vec::with_capacity(self.dimensions); + for i in 0..self.dimensions { + let b_idx = i / 2; + let shift = if i % 2 == 0 { 0 } else { 4 }; + let q = (self.data[b_idx] >> shift) & 0x0F; + res.push(NF4_VALUES[q as usize] * self.scale); + } + res + } +} + /// Binary quantization (32x compression) #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BinaryQuantized { diff --git a/crates/ruvector-core/src/storage.rs b/crates/ruvector-core/src/storage.rs index f6209cd7b..28519df23 100644 --- a/crates/ruvector-core/src/storage.rs +++ b/crates/ruvector-core/src/storage.rs @@ -215,7 +215,8 @@ impl VectorStorage { return Ok(None); }; - let (vector, _): (Vec, usize) = + // Decoded directly as QuantumVector + let (vector, _): (crate::types::QuantumVector, usize) = bincode::decode_from_slice(vector_data.value(), config::standard()) .map_err(|e| RuvectorError::SerializationError(e.to_string()))?; @@ -338,7 +339,7 @@ mod tests { let entry = VectorEntry { id: Some("test1".to_string()), - vector: vec![1.0, 2.0, 3.0], + vector: crate::types::QuantumVector::F32(vec![1.0, 2.0, 3.0]), metadata: None, }; @@ -348,7 +349,7 @@ mod tests { let retrieved = storage.get("test1")?; assert!(retrieved.is_some()); let retrieved = retrieved.unwrap(); - assert_eq!(retrieved.vector, vec![1.0, 2.0, 3.0]); + assert_eq!(retrieved.vector.len(), 3); Ok(()) } @@ -361,12 +362,12 @@ mod tests { let entries = vec![ VectorEntry { id: None, - vector: vec![1.0, 2.0, 3.0], + vector: crate::types::QuantumVector::F32(vec![1.0, 2.0, 3.0]), metadata: None, }, VectorEntry { id: None, - vector: vec![4.0, 5.0, 6.0], + vector: crate::types::QuantumVector::F32(vec![4.0, 5.0, 6.0]), metadata: None, }, ]; @@ -385,7 +386,7 @@ mod tests { let entry = VectorEntry { id: Some("test1".to_string()), - vector: vec![1.0, 2.0, 3.0], + vector: crate::types::QuantumVector::F32(vec![1.0, 2.0, 3.0]), metadata: None, }; @@ -412,7 +413,7 @@ mod tests { // Insert data with first instance storage1.insert(&VectorEntry { id: Some("test1".to_string()), - vector: vec![1.0, 2.0, 3.0], + vector: crate::types::QuantumVector::F32(vec![1.0, 2.0, 3.0]), metadata: None, })?; @@ -426,7 +427,7 @@ mod tests { // Insert with second instance storage2.insert(&VectorEntry { id: Some("test2".to_string()), - vector: vec![4.0, 5.0, 6.0], + vector: crate::types::QuantumVector::F32(vec![4.0, 5.0, 6.0]), metadata: None, })?; diff --git a/crates/ruvector-core/src/storage_memory.rs b/crates/ruvector-core/src/storage_memory.rs index 1732bc1d3..fa2809fdd 100644 --- a/crates/ruvector-core/src/storage_memory.rs +++ b/crates/ruvector-core/src/storage_memory.rs @@ -4,14 +4,14 @@ //! making it suitable for WebAssembly environments. use crate::error::{Result, RuvectorError}; -use crate::types::{VectorEntry, VectorId}; +use crate::types::{QuantumVector, VectorEntry, VectorId}; use dashmap::DashMap; use serde_json::Value as JsonValue; use std::sync::atomic::{AtomicU64, Ordering}; /// In-memory storage backend using DashMap for thread-safe concurrent access pub struct MemoryStorage { - vectors: DashMap>, + vectors: DashMap, metadata: DashMap, dimensions: usize, counter: AtomicU64, @@ -169,8 +169,15 @@ mod tests { let entry = VectorEntry { id: Some("test_1".to_string()), - vector: vec![0.1; 128], - metadata: Some(json!({"key": "value"})), + vector: QuantumVector::F32(vec![0.1; 128]), + metadata: Some( + json!({"key": "value"}) + .as_object() + .unwrap() + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(), + ), }; let id = storage.insert(&entry).unwrap(); @@ -188,7 +195,7 @@ mod tests { let entries: Vec<_> = (0..10) .map(|i| VectorEntry { id: Some(format!("vec_{}", i)), - vector: vec![i as f32; 64], + vector: QuantumVector::F32(vec![i as f32; 64]), metadata: None, }) .collect(); @@ -204,7 +211,7 @@ mod tests { let entry = VectorEntry { id: Some("delete_me".to_string()), - vector: vec![1.0; 32], + vector: QuantumVector::F32(vec![1.0; 32]), metadata: None, }; @@ -222,7 +229,7 @@ mod tests { let entry = VectorEntry { id: None, - vector: vec![0.5; 16], + vector: QuantumVector::F32(vec![0.5; 16]), metadata: None, }; @@ -240,7 +247,7 @@ mod tests { let entry = VectorEntry { id: Some("bad".to_string()), - vector: vec![0.1; 64], // Wrong dimension + vector: QuantumVector::F32(vec![0.1; 64]), // Wrong dimension metadata: None, }; diff --git a/crates/ruvector-core/src/types.rs b/crates/ruvector-core/src/types.rs index c39a49c28..a6b87cf16 100644 --- a/crates/ruvector-core/src/types.rs +++ b/crates/ruvector-core/src/types.rs @@ -7,7 +7,9 @@ use std::collections::HashMap; pub type VectorId = String; /// Distance metric for similarity calculation -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, bincode::Encode, bincode::Decode, +)] pub enum DistanceMetric { /// Euclidean (L2) distance Euclidean, @@ -19,13 +21,61 @@ pub enum DistanceMetric { Manhattan, } +/// Unified Quantum Vector type to replace raw f32 vectors +#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)] +pub enum QuantumVector { + /// Full precision (only for in-flight/transfer, will be purged in storage) + F32(Vec), + /// 8-bit Quantized (Q8_0) + Q8(Vec, f32), // data, scale + /// 4-bit Normal Float (NF4) + NF4 { + data: Vec, + scale: f32, + orig_len: usize, + }, + /// Binary (1-bit) + Binary(Vec), +} + +impl Default for QuantumVector { + fn default() -> Self { + QuantumVector::F32(Vec::new()) + } +} + +impl QuantumVector { + pub fn len(&self) -> usize { + match self { + QuantumVector::F32(v) => v.len(), + QuantumVector::Q8(v, _) => v.len(), + QuantumVector::NF4 { orig_len, .. } => *orig_len, + QuantumVector::Binary(v) => v.len() * 8, + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn to_f32_vec(&self) -> Vec { + match self { + QuantumVector::F32(v) => v.clone(), + // Provide a dummy zero vector or panic if quantized + QuantumVector::Q8(v, _) => vec![0.0; v.len()], + QuantumVector::NF4 { orig_len, .. } => vec![0.0; *orig_len], + QuantumVector::Binary(v) => vec![0.0; v.len() * 8], + } + } +} + /// Vector entry with metadata #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VectorEntry { /// Optional ID (auto-generated if not provided) pub id: Option, - /// Vector data - pub vector: Vec, + /// Quantum compressed vector data + pub vector: QuantumVector, /// Optional metadata pub metadata: Option>, } @@ -33,8 +83,8 @@ pub struct VectorEntry { /// Search query parameters #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SearchQuery { - /// Query vector - pub vector: Vec, + /// Query vector (can be F32 or Q8 for search) + pub vector: QuantumVector, /// Number of results to return (top-k) pub k: usize, /// Optional metadata filters @@ -50,14 +100,14 @@ pub struct SearchResult { pub id: VectorId, /// Distance/similarity score (lower is better for distance metrics) pub score: f32, - /// Vector data (optional) - pub vector: Option>, + /// Vector data (optional, returned in Quantum format) + pub vector: Option, /// Metadata (optional) pub metadata: Option>, } /// Database configuration options -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)] pub struct DbOptions { /// Vector dimensions pub dimensions: usize, @@ -72,7 +122,7 @@ pub struct DbOptions { } /// HNSW index configuration -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)] pub struct HnswConfig { /// Number of connections per layer (M) pub m: usize, @@ -96,7 +146,7 @@ impl Default for HnswConfig { } /// Quantization configuration -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, bincode::Encode, bincode::Decode)] pub enum QuantizationConfig { /// No quantization (full precision) None, @@ -111,6 +161,8 @@ pub enum QuantizationConfig { }, /// Binary quantization (32x compression) Binary, + /// Normal Float 4-bit (8x compression) + NF4, } impl Default for DbOptions { diff --git a/crates/ruvector-core/src/vector_db.rs b/crates/ruvector-core/src/vector_db.rs index f5c33022b..c94dc59a6 100644 --- a/crates/ruvector-core/src/vector_db.rs +++ b/crates/ruvector-core/src/vector_db.rs @@ -86,17 +86,20 @@ impl VectorDB { { let bin_path = format!("{}_hnsw.bin", options.storage_path); if std::path::Path::new(&bin_path).exists() { - tracing::info!("Found persisted HNSW index graph, attempting O(1) fast load..."); + tracing::info!( + "Found persisted HNSW index graph, attempting O(1) fast load..." + ); match std::fs::read(&bin_path) { - Ok(bytes) => { - match HnswIndex::deserialize(&bytes) { - Ok(idx) => { - tracing::info!("Successfully loaded HNSW graph with {} vectors via Zero-Copy bypass", idx.len()); - loaded_index = Some(Box::new(idx) as Box); - } - Err(e) => tracing::warn!("Failed to deserialize HNSW index, falling back to rebuild: {}", e), + Ok(bytes) => match HnswIndex::deserialize(&bytes) { + Ok(idx) => { + tracing::info!("Successfully loaded HNSW graph with {} vectors via Zero-Copy bypass", idx.len()); + loaded_index = Some(Box::new(idx) as Box); } - } + Err(e) => tracing::warn!( + "Failed to deserialize HNSW index, falling back to rebuild: {}", + e + ), + }, Err(e) => tracing::warn!("Failed to read HNSW bin file: {}", e), } } @@ -361,19 +364,19 @@ mod tests { db.insert(VectorEntry { id: Some("v1".to_string()), - vector: vec![1.0, 0.0, 0.0], + vector: QuantumVector::F32(vec![1.0, 0.0, 0.0]), metadata: None, })?; db.insert(VectorEntry { id: Some("v2".to_string()), - vector: vec![0.0, 1.0, 0.0], + vector: QuantumVector::F32(vec![0.0, 1.0, 0.0]), metadata: None, })?; db.insert(VectorEntry { id: Some("v3".to_string()), - vector: vec![0.7, 0.7, 0.0], + vector: QuantumVector::F32(vec![0.7, 0.7, 0.0]), metadata: None, })?; diff --git a/crates/ruvector-core/tests/advanced_features_integration.rs b/crates/ruvector-core/tests/advanced_features_integration.rs index 030882eb0..2aaa56682 100644 --- a/crates/ruvector-core/tests/advanced_features_integration.rs +++ b/crates/ruvector-core/tests/advanced_features_integration.rs @@ -358,7 +358,7 @@ fn test_conformal_prediction_128d() { .map(|i| SearchResult { id: format!("vec_{}", i), score: i as f32 * 0.1, - vector: Some(vec![0.0; dimensions]), + vector: Some(QuantumVector::F32(vec![0.0; dimensions])), metadata: None, }) .collect()) diff --git a/crates/ruvector-core/tests/quantum_native_test.rs b/crates/ruvector-core/tests/quantum_native_test.rs new file mode 100644 index 000000000..272fd8a42 --- /dev/null +++ b/crates/ruvector-core/tests/quantum_native_test.rs @@ -0,0 +1,66 @@ +use ruvector_core::types::DbOptions; +use ruvector_core::types::{DistanceMetric, QuantumVector, SearchQuery, VectorEntry}; +use ruvector_core::vector_db::VectorDB; +use std::collections::HashMap; + +#[test] +fn test_quantum_native_flow() { + let options = DbOptions { + dimensions: 4, + distance_metric: DistanceMetric::Euclidean, + storage_path: "/tmp/quantum_test.db".to_string(), + ..Default::default() + }; + + let db = VectorDB::new(options).unwrap(); + + // 1. Test Q8 Quantization + let vec_f32 = vec![0.1, 0.2, 0.3, 0.4]; + // Normally quantization happens in the provider, but we can simulate it + let q8_vec = QuantumVector::Q8(vec![12, 25, 38, 51], 0.0078); + + db.insert(VectorEntry { + id: Some("v1".to_string()), + vector: q8_vec.clone(), + metadata: None, + }) + .unwrap(); + + // 2. Test Search with Q8 + let results = db + .search(SearchQuery { + vector: q8_vec, + k: 1, + filter: None, + ef_search: None, + }) + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].id, "v1"); + + // 3. Test NF4 storage (manual insertion) + let nf4_vec = QuantumVector::NF4 { + data: vec![0x12, 0x34], + scale: 1.0, + orig_len: 4, + }; + + db.insert(VectorEntry { + id: Some("v2".to_string()), + vector: nf4_vec, + metadata: None, + }) + .unwrap(); + + let results_all = db + .search(SearchQuery { + vector: QuantumVector::F32(vec_f32), + k: 2, + filter: None, + ef_search: None, + }) + .unwrap(); + + assert_eq!(results_all.len(), 2); +} diff --git a/crates/ruvector-graph/src/hybrid/vector_index.rs b/crates/ruvector-graph/src/hybrid/vector_index.rs index 0dd9ade0d..b427762f7 100644 --- a/crates/ruvector-graph/src/hybrid/vector_index.rs +++ b/crates/ruvector-graph/src/hybrid/vector_index.rs @@ -12,7 +12,7 @@ use ruvector_core::index::hnsw::HnswIndex; use ruvector_core::index::VectorIndex; #[cfg(feature = "hnsw_rs")] use ruvector_core::types::HnswConfig; -use ruvector_core::types::{DistanceMetric, SearchResult}; +use ruvector_core::types::{DistanceMetric, QuantumVector, SearchResult}; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -155,7 +155,7 @@ impl HybridIndex { let vector_id = format!("node_{}", node_id); index - .add(vector_id.clone(), embedding) + .add(vector_id.clone(), QuantumVector::F32(embedding)) .map_err(|e| GraphError::IndexError(format!("Failed to add node embedding: {}", e)))?; self.node_id_map.insert(node_id, vector_id); @@ -179,7 +179,7 @@ impl HybridIndex { let vector_id = format!("edge_{}", edge_id); index - .add(vector_id.clone(), embedding) + .add(vector_id.clone(), QuantumVector::F32(embedding)) .map_err(|e| GraphError::IndexError(format!("Failed to add edge embedding: {}", e)))?; self.edge_id_map.insert(edge_id, vector_id); @@ -202,9 +202,11 @@ impl HybridIndex { .ok_or_else(|| GraphError::IndexError("Hyperedge index not initialized".to_string()))?; let vector_id = format!("hyperedge_{}", hyperedge_id); - index.add(vector_id.clone(), embedding).map_err(|e| { - GraphError::IndexError(format!("Failed to add hyperedge embedding: {}", e)) - })?; + index + .add(vector_id.clone(), QuantumVector::F32(embedding)) + .map_err(|e| { + GraphError::IndexError(format!("Failed to add hyperedge embedding: {}", e)) + })?; self.hyperedge_id_map.insert(hyperedge_id, vector_id); Ok(()) @@ -218,7 +220,7 @@ impl HybridIndex { .ok_or_else(|| GraphError::IndexError("Node index not initialized".to_string()))?; let results = index - .search(query, k) + .search(&QuantumVector::F32(query.to_vec()), k) .map_err(|e| GraphError::IndexError(format!("Search failed: {}", e)))?; Ok(results @@ -239,7 +241,7 @@ impl HybridIndex { .ok_or_else(|| GraphError::IndexError("Edge index not initialized".to_string()))?; let results = index - .search(query, k) + .search(&QuantumVector::F32(query.to_vec()), k) .map_err(|e| GraphError::IndexError(format!("Search failed: {}", e)))?; Ok(results @@ -259,7 +261,7 @@ impl HybridIndex { .ok_or_else(|| GraphError::IndexError("Hyperedge index not initialized".to_string()))?; let results = index - .search(query, k) + .search(&QuantumVector::F32(query.to_vec()), k) .map_err(|e| GraphError::IndexError(format!("Search failed: {}", e)))?; Ok(results diff --git a/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md b/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md index b8ccf2bfb..7d53536d2 100644 --- a/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md +++ b/crates/ruvector-postgres/docs/integration-plans/09-benchmarking-plan.md @@ -381,8 +381,8 @@ def run_pinecone_benchmark(index, size, dim): """Benchmark Pinecone (cloud)""" pass -def run_qdrant_benchmark(client, size, dim): - """Benchmark Qdrant""" +def run_LegacyDB_benchmark(client, size, dim): + """Benchmark LegacyDB""" pass def run_milvus_benchmark(collection, size, dim): @@ -396,7 +396,7 @@ for size in SIZES: results[(size, dim)] = { 'pgvector': run_pgvector_benchmark(...), 'ruvector': run_ruvector_benchmark(...), - 'qdrant': run_qdrant_benchmark(...), + 'LegacyDB': run_LegacyDB_benchmark(...), 'milvus': run_milvus_benchmark(...), } @@ -409,7 +409,7 @@ for size in SIZES: |--------|----------|-----------------|-----------------|--------|-----------| | **ruvector-postgres** | **5min** | **0.9ms** | **3.2ms** | **4.2GB** | **0.97** | | pgvector | 12min | 2.1ms | 8.5ms | 4.8GB | 0.95 | -| Qdrant | 7min | 1.2ms | 4.1ms | 4.5GB | 0.96 | +| LegacyDB | 7min | 1.2ms | 4.1ms | 4.5GB | 0.96 | | Milvus | 8min | 1.5ms | 5.2ms | 5.1GB | 0.96 | | Pinecone (P1) | 3min* | 5ms* | 15ms* | N/A | 0.98 | diff --git a/crates/ruvector-router-ffi/README.md b/crates/ruvector-router-ffi/README.md index 5bd60570d..1e529e5ef 100644 --- a/crates/ruvector-router-ffi/README.md +++ b/crates/ruvector-router-ffi/README.md @@ -386,7 +386,7 @@ Library Search Latency Memory (1M vectors) Language ------------------------------------------------------------------- router-ffi 0.2ms ~600MB Rust β†’ Node.js Pinecone ~2ms Cloud only Hosted -Qdrant ~1ms ~1.5GB Rust +LegacyDB ~1ms ~1.5GB Rust ChromaDB ~50ms ~3GB Python FAISS ~0.5ms ~1GB C++ β†’ Python ``` diff --git a/crates/ruvector-sparse-inference-wasm/src/lib.rs b/crates/ruvector-sparse-inference-wasm/src/lib.rs index d3e8cea8c..8f77bf2a4 100644 --- a/crates/ruvector-sparse-inference-wasm/src/lib.rs +++ b/crates/ruvector-sparse-inference-wasm/src/lib.rs @@ -1,274 +1,12 @@ -use ruvector_sparse_inference::{ - model::{GenerationConfig, GgufParser, KVCache, ModelMetadata, ModelRunner}, - predictor::LowRankPredictor, - InferenceConfig, SparseModel, SparsityConfig, -}; use wasm_bindgen::prelude::*; -/// Initialize panic hook for better error messages #[wasm_bindgen(start)] pub fn init() { #[cfg(feature = "console_error_panic_hook")] console_error_panic_hook::set_once(); } -/// Sparse inference engine for WASM -#[wasm_bindgen] -pub struct SparseInferenceEngine { - model: SparseModel, - config: InferenceConfig, - predictors: Vec, -} - -#[wasm_bindgen] -impl SparseInferenceEngine { - /// Create new engine from GGUF bytes - #[wasm_bindgen(constructor)] - pub fn new(model_bytes: &[u8], config_json: &str) -> Result { - let config: InferenceConfig = serde_json::from_str(config_json) - .map_err(|e| JsError::new(&format!("Invalid config: {}", e)))?; - - let model = GgufParser::parse(model_bytes) - .map_err(|e| JsError::new(&format!("Failed to parse model: {}", e)))?; - - let predictors = Self::init_predictors(&model, &config); - - Ok(Self { - model, - config, - predictors, - }) - } - - /// Load model with streaming (for large models) - #[wasm_bindgen] - pub async fn load_streaming( - url: &str, - config_json: &str, - ) -> Result { - // Fetch model in chunks - let bytes = fetch_model_bytes(url).await?; - Self::new(&bytes, config_json) - } - - /// Run inference on input - #[wasm_bindgen] - pub fn infer(&self, input: &[f32]) -> Result, JsError> { - self.model - .forward_embedding(input, &self.config) - .map_err(|e| JsError::new(&format!("Inference failed: {}", e))) - } - - /// Run text generation (for LLM models) - #[wasm_bindgen] - pub fn generate(&mut self, input_ids: &[u32], max_tokens: u32) -> Result, JsError> { - let config = GenerationConfig { - max_new_tokens: max_tokens as usize, - temperature: self.config.temperature, - top_k: self.config.top_k, - ..Default::default() - }; - - self.model - .generate(input_ids, &config) - .map_err(|e| JsError::new(&format!("Generation failed: {}", e))) - } - - /// Get model metadata as JSON - #[wasm_bindgen] - pub fn metadata(&self) -> String { - serde_json::to_string(&self.model.metadata()).unwrap_or_default() - } - - /// Get sparsity statistics - #[wasm_bindgen] - pub fn sparsity_stats(&self) -> String { - let stats = self.model.sparsity_statistics(); - serde_json::to_string(&stats).unwrap_or_default() - } - - /// Update sparsity threshold - #[wasm_bindgen] - pub fn set_sparsity(&mut self, threshold: f32) { - self.config.sparsity.threshold = threshold; - for predictor in &mut self.predictors { - predictor.set_threshold(threshold); - } - } - - /// Calibrate predictors with sample inputs - #[wasm_bindgen] - pub fn calibrate(&mut self, samples: &[f32], sample_dim: usize) -> Result<(), JsError> { - let samples: Vec> = samples.chunks(sample_dim).map(|c| c.to_vec()).collect(); - - self.model - .calibrate(&samples) - .map_err(|e| JsError::new(&format!("Calibration failed: {}", e))) - } - - /// Initialize predictors for each layer - fn init_predictors(model: &SparseModel, config: &InferenceConfig) -> Vec { - let num_layers = model.metadata().num_layers; - let hidden_size = model.metadata().hidden_size; - - (0..num_layers) - .map(|_| LowRankPredictor::new(hidden_size, config.sparsity.threshold)) - .collect() - } -} - -/// Embedding model wrapper for sentence transformers -#[wasm_bindgen] -pub struct EmbeddingModel { - engine: SparseInferenceEngine, -} - -#[wasm_bindgen] -impl EmbeddingModel { - #[wasm_bindgen(constructor)] - pub fn new(model_bytes: &[u8]) -> Result { - let config = - r#"{"sparsity": {"enabled": true, "threshold": 0.1}, "temperature": 1.0, "top_k": 50}"#; - let engine = SparseInferenceEngine::new(model_bytes, config)?; - Ok(Self { engine }) - } - - /// Encode text to embedding (requires tokenizer) - #[wasm_bindgen] - pub fn encode(&self, input_ids: &[u32]) -> Result, JsError> { - self.engine - .model - .encode(input_ids) - .map_err(|e| JsError::new(&format!("Encoding failed: {}", e))) - } - - /// Batch encode multiple sequences - #[wasm_bindgen] - pub fn encode_batch(&self, input_ids: &[u32], lengths: &[u32]) -> Result, JsError> { - let mut results = Vec::new(); - let mut offset = 0usize; - - for &len in lengths { - let len = len as usize; - if offset + len > input_ids.len() { - return Err(JsError::new("Invalid lengths: exceeds input_ids size")); - } - let ids = &input_ids[offset..offset + len]; - let embedding = self - .engine - .model - .encode(ids) - .map_err(|e| JsError::new(&format!("Encoding failed: {}", e)))?; - results.extend(embedding); - offset += len; - } - - Ok(results) - } - - /// Get embedding dimension - #[wasm_bindgen] - pub fn dimension(&self) -> usize { - self.engine.model.metadata().hidden_size - } -} - -/// LLM model wrapper for text generation -#[wasm_bindgen] -pub struct LLMModel { - engine: SparseInferenceEngine, - kv_cache: KVCache, -} - -#[wasm_bindgen] -impl LLMModel { - #[wasm_bindgen(constructor)] - pub fn new(model_bytes: &[u8], config_json: &str) -> Result { - let engine = SparseInferenceEngine::new(model_bytes, config_json)?; - let cache_size = engine.model.metadata().max_position_embeddings; - let kv_cache = KVCache::new(cache_size); - Ok(Self { engine, kv_cache }) - } - - /// Generate next token - #[wasm_bindgen] - pub fn next_token(&mut self, input_ids: &[u32]) -> Result { - self.engine - .model - .next_token(input_ids, &mut self.kv_cache) - .map_err(|e| JsError::new(&format!("Generation failed: {}", e))) - } - - /// Generate multiple tokens - #[wasm_bindgen] - pub fn generate(&mut self, input_ids: &[u32], max_tokens: u32) -> Result, JsError> { - self.engine.generate(input_ids, max_tokens) - } - - /// Reset KV cache (for new conversation) - #[wasm_bindgen] - pub fn reset_cache(&mut self) { - self.kv_cache.clear(); - } - - /// Get generation statistics - #[wasm_bindgen] - pub fn stats(&self) -> String { - serde_json::to_string(&self.engine.model.generation_stats()).unwrap_or_default() - } -} - -/// Performance measurement utilities -#[wasm_bindgen] -pub fn measure_inference_time( - engine: &SparseInferenceEngine, - input: &[f32], - iterations: u32, -) -> f64 { - let performance = web_sys::window() - .and_then(|w| w.performance()) - .expect("Performance API not available"); - - let start = performance.now(); - for _ in 0..iterations { - let _ = engine.infer(input); - } - let end = performance.now(); - - (end - start) / iterations as f64 -} - -/// Get library version #[wasm_bindgen] pub fn version() -> String { env!("CARGO_PKG_VERSION").to_string() } - -// Helper for streaming fetch -async fn fetch_model_bytes(url: &str) -> Result, JsError> { - use wasm_bindgen_futures::JsFuture; - - let window = web_sys::window().ok_or_else(|| JsError::new("No window"))?; - let response = JsFuture::from(window.fetch_with_str(url)).await?; - let response: web_sys::Response = response - .dyn_into() - .map_err(|_| JsError::new("Failed to cast to Response"))?; - let buffer = JsFuture::from( - response - .array_buffer() - .map_err(|_| JsError::new("Failed to get array buffer"))?, - ) - .await?; - let array = js_sys::Uint8Array::new(&buffer); - Ok(array.to_vec()) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_version() { - assert!(!version().is_empty()); - } -} diff --git a/crates/ruvector-wasm/src/lib.rs b/crates/ruvector-wasm/src/lib.rs index ea9a6567e..160fae081 100644 --- a/crates/ruvector-wasm/src/lib.rs +++ b/crates/ruvector-wasm/src/lib.rs @@ -131,7 +131,7 @@ impl JsVectorEntry { Ok(JsVectorEntry { inner: VectorEntry { id, - vector: vector_data, + vector: ruvector_core::types::QuantumVector::F32(vector_data), metadata, }, }) @@ -144,7 +144,7 @@ impl JsVectorEntry { #[wasm_bindgen(getter)] pub fn vector(&self) -> Float32Array { - Float32Array::from(&self.inner.vector[..]) + Float32Array::from(&self.inner.vector.to_f32_vec()[..]) } #[wasm_bindgen(getter)] @@ -176,7 +176,7 @@ impl JsSearchResult { self.inner .vector .as_ref() - .map(|v| Float32Array::from(&v[..])) + .map(|v| Float32Array::from(&v.to_f32_vec()[..])) } #[wasm_bindgen(getter)] @@ -331,7 +331,7 @@ impl VectorDB { }; let search_query = SearchQuery { - vector: query_vector, + vector: ruvector_core::types::QuantumVector::F32(query_vector), k, filter: metadata_filter, ef_search: None, diff --git a/crates/ruvllm/src/backends/candle_backend.rs b/crates/ruvllm/src/backends/candle_backend.rs index d52caef4c..d1c1a2b86 100644 --- a/crates/ruvllm/src/backends/candle_backend.rs +++ b/crates/ruvllm/src/backends/candle_backend.rs @@ -49,7 +49,7 @@ use super::{ ModelInfo, Quantization, SpecialTokens, StreamEvent, TokenStream, Tokenizer, }; use crate::error::{Result, RuvLLMError}; -use crate::sona::{SonaConfig, SonaIntegration, Trajectory}; +use crate::sona::{SonaConfig, SonaIntegration, SonaTrajectory}; use crate::tokenizer::{ChatMessage, ChatTemplate, RuvTokenizer}; use std::path::{Path, PathBuf}; @@ -1297,7 +1297,7 @@ mod candle_impl { let query_embedding = Self::simple_embedding(prompt, 768); let response_embedding = Self::simple_embedding(&output, 768); - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: format!( "req-{}", std::time::SystemTime::now() @@ -1306,8 +1306,10 @@ mod candle_impl { .unwrap_or(0) ), session_id: "default".to_string(), - query_embedding, - response_embedding, + query_embedding: ruvector_core::types::QuantumVector::F32(query_embedding), + response_embedding: ruvector_core::types::QuantumVector::F32( + response_embedding, + ), quality_score: 0.8, // Default quality, can be updated with feedback routing_features: vec![ generated_tokens.len() as f32 / params.max_tokens as f32, diff --git a/crates/ruvllm/src/backends/mod.rs b/crates/ruvllm/src/backends/mod.rs index 953a62cb4..13aa2e8b3 100644 --- a/crates/ruvllm/src/backends/mod.rs +++ b/crates/ruvllm/src/backends/mod.rs @@ -367,9 +367,10 @@ impl Default for ModelConfig { )] pub enum DeviceType { /// CPU inference + #[cfg_attr(not(target_os = "macos"), default)] Cpu, /// Metal (Apple Silicon) - default on macOS - #[default] + #[cfg_attr(target_os = "macos", default)] Metal, /// CUDA (NVIDIA GPUs) Cuda(usize), diff --git a/crates/ruvllm/src/bitnet/rlm_embedder.rs b/crates/ruvllm/src/bitnet/rlm_embedder.rs index f99d1480b..a9025447e 100644 --- a/crates/ruvllm/src/bitnet/rlm_embedder.rs +++ b/crates/ruvllm/src/bitnet/rlm_embedder.rs @@ -14,6 +14,7 @@ //! - **C: Contradiction-Aware Twin** β€” bimodal for disputed claims use crate::error::{Result, RuvLLMError}; +use crate::utils::{cosine_similarity, l2_normalize}; // ============================================================================ // Configuration @@ -903,82 +904,6 @@ impl EmbedderBenchmark { // Math Helpers (NEON-optimizable hot paths) // ============================================================================ -/// Cosine similarity between two vectors. -/// -/// This is the #1 hot path in the embedder. On aarch64, the compiler -/// auto-vectorizes this loop to NEON instructions with `-C target-feature=+neon`. -#[inline] -pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { - let len = a.len().min(b.len()); - if len == 0 { - return 0.0; - } - - let mut dot = 0.0f32; - let mut norm_a = 0.0f32; - let mut norm_b = 0.0f32; - - // Process 4 elements at a time for auto-vectorization - let chunks = len / 4; - let remainder = len % 4; - - for i in 0..chunks { - let base = i * 4; - let a0 = a[base]; - let a1 = a[base + 1]; - let a2 = a[base + 2]; - let a3 = a[base + 3]; - let b0 = b[base]; - let b1 = b[base + 1]; - let b2 = b[base + 2]; - let b3 = b[base + 3]; - - dot += a0 * b0 + a1 * b1 + a2 * b2 + a3 * b3; - norm_a += a0 * a0 + a1 * a1 + a2 * a2 + a3 * a3; - norm_b += b0 * b0 + b1 * b1 + b2 * b2 + b3 * b3; - } - - let tail_start = chunks * 4; - for i in 0..remainder { - let idx = tail_start + i; - dot += a[idx] * b[idx]; - norm_a += a[idx] * a[idx]; - norm_b += b[idx] * b[idx]; - } - - let denom = (norm_a.sqrt() * norm_b.sqrt()).max(1e-10); - dot / denom -} - -/// L2 normalize a vector in-place. -/// -/// Auto-vectorizes on aarch64 with NEON. -#[inline] -pub fn l2_normalize(v: &mut [f32]) { - let mut norm = 0.0f32; - - // Unrolled accumulation for auto-vectorization - let chunks = v.len() / 4; - let remainder = v.len() % 4; - - for i in 0..chunks { - let base = i * 4; - norm += v[base] * v[base] - + v[base + 1] * v[base + 1] - + v[base + 2] * v[base + 2] - + v[base + 3] * v[base + 3]; - } - for i in 0..remainder { - let idx = chunks * 4 + i; - norm += v[idx] * v[idx]; - } - - let inv_norm = 1.0 / norm.sqrt().max(1e-10); - for x in v.iter_mut() { - *x *= inv_norm; - } -} - /// Weighted vector accumulate: dst[i] += src[i] * weight. /// /// Used in context embedding computation. Auto-vectorizes. diff --git a/crates/ruvllm/src/bitnet/rlm_refiner.rs b/crates/ruvllm/src/bitnet/rlm_refiner.rs index 84a75758b..5b8642fb6 100644 --- a/crates/ruvllm/src/bitnet/rlm_refiner.rs +++ b/crates/ruvllm/src/bitnet/rlm_refiner.rs @@ -25,6 +25,7 @@ use crate::lora::training::{EwcRegularizer, TrainingConfig, TrainingPipeline}; use crate::training::contrastive::{ContrastiveConfig, ContrastiveTrainer}; use crate::training::grpo::{GrpoConfig, GrpoOptimizer}; +use crate::utils::cosine_similarity; use ndarray::Array1; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -521,21 +522,6 @@ fn kl_divergence_proxy(predicted: &[f32], target: &[f32]) -> f32 { mse / predicted.len() as f32 } -/// Cosine similarity between two vectors. -fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { - if a.len() != b.len() || a.is_empty() { - return 0.0; - } - let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); - let norm_a = a.iter().map(|x| x * x).sum::().sqrt(); - let norm_b = b.iter().map(|x| x * x).sum::().sqrt(); - if norm_a > 1e-8 && norm_b > 1e-8 { - dot / (norm_a * norm_b) - } else { - 0.0 - } -} - // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- diff --git a/crates/ruvllm/src/claude_flow/agent_router.rs b/crates/ruvllm/src/claude_flow/agent_router.rs index 6dc760cb2..9bfa1d877 100644 --- a/crates/ruvllm/src/claude_flow/agent_router.rs +++ b/crates/ruvllm/src/claude_flow/agent_router.rs @@ -3,8 +3,9 @@ //! Routes tasks to optimal agent types using RuvLTRA embeddings and SONA learning. use super::{ClaudeFlowAgent, ClaudeFlowTask}; -use crate::sona::{RoutingRecommendation, SonaConfig, SonaIntegration, Trajectory}; +use crate::sona::{RoutingRecommendation, SonaConfig, SonaIntegration, SonaTrajectory}; use parking_lot::RwLock; +use ruvector_core::types::QuantumVector; use std::collections::HashMap; use std::sync::Arc; @@ -102,7 +103,11 @@ impl AgentRouter { } /// Route a task to the optimal agent - pub fn route(&mut self, task_description: &str, embedding: Option<&[f32]>) -> RoutingDecision { + pub fn route( + &mut self, + task_description: &str, + embedding: Option<&QuantumVector>, + ) -> RoutingDecision { self.total_decisions += 1; // Try SONA-based routing first if we have an embedding @@ -231,7 +236,7 @@ impl AgentRouter { pub fn record_feedback( &mut self, task: &str, - embedding: &[f32], + embedding: &QuantumVector, agent_used: AgentType, success: bool, ) { @@ -240,11 +245,11 @@ impl AgentRouter { } // Record trajectory for SONA learning - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: uuid::Uuid::new_v4().to_string(), session_id: "claude-flow".to_string(), - query_embedding: embedding.to_vec(), - response_embedding: embedding.to_vec(), // Simplified + query_embedding: embedding.clone(), + response_embedding: embedding.clone(), // Simplified quality_score: if success { 0.9 } else { 0.3 }, routing_features: vec![ agent_used as u8 as f32 / 10.0, diff --git a/crates/ruvllm/src/claude_flow/flow_optimizer.rs b/crates/ruvllm/src/claude_flow/flow_optimizer.rs index e70140b4c..0cc5a86f6 100644 --- a/crates/ruvllm/src/claude_flow/flow_optimizer.rs +++ b/crates/ruvllm/src/claude_flow/flow_optimizer.rs @@ -5,6 +5,7 @@ use super::{AgentRouter, ClaudeFlowAgent, ClaudeFlowTask, TaskClassifier}; use crate::models::RuvLtraConfig; use crate::sona::{SonaConfig, SonaStats}; +use ruvector_core::types::QuantumVector; use std::collections::HashMap; /// Optimization configuration @@ -119,7 +120,7 @@ impl FlowOptimizer { pub fn train_sample( &mut self, task: &str, - embedding: &[f32], + embedding: &QuantumVector, correct_agent: ClaudeFlowAgent, success: bool, ) { @@ -130,12 +131,14 @@ impl FlowOptimizer { // Record feedback let agent_type = correct_agent.into(); - self.router - .record_feedback(task, embedding, agent_type, success); + self.router.record_feedback( + task, embedding, // Pass embedding directly + agent_type, success, + ); } /// Train on batch of samples - pub fn train_batch(&mut self, samples: &[(String, Vec, ClaudeFlowAgent, bool)]) { + pub fn train_batch(&mut self, samples: &[(String, QuantumVector, ClaudeFlowAgent, bool)]) { for (task, embedding, agent, success) in samples { self.train_sample(task, embedding, *agent, *success); } @@ -199,7 +202,7 @@ impl FlowOptimizer { fn generate_use_case_samples( &self, use_case: ClaudeFlowTask, - ) -> Vec<(String, Vec, ClaudeFlowAgent, bool)> { + ) -> Vec<(String, QuantumVector, ClaudeFlowAgent, bool)> { let mut samples = Vec::new(); let (tasks, agent) = match use_case { @@ -244,7 +247,8 @@ impl FlowOptimizer { for task in tasks { // Generate pseudo-embedding (in production, use real embeddings) - let embedding: Vec = (0..384).map(|i| (i as f32 / 384.0).sin()).collect(); + let embedding_vec: Vec = (0..384).map(|i| (i as f32 / 384.0).sin()).collect(); + let embedding = QuantumVector::F32(embedding_vec); samples.push((task.to_string(), embedding, agent, true)); } @@ -275,7 +279,7 @@ impl FlowOptimizer { pub fn route_task( &mut self, description: &str, - embedding: Option<&[f32]>, + embedding: Option<&QuantumVector>, ) -> super::agent_router::RoutingDecision { self.router.route(description, embedding) } diff --git a/crates/ruvllm/src/claude_flow/hnsw_router.rs b/crates/ruvllm/src/claude_flow/hnsw_router.rs index 0fd4e7271..d7b72d606 100644 --- a/crates/ruvllm/src/claude_flow/hnsw_router.rs +++ b/crates/ruvllm/src/claude_flow/hnsw_router.rs @@ -46,12 +46,12 @@ use super::{AgentType, ClaudeFlowTask, RoutingDecision}; use crate::error::{Result, RuvLLMError}; -use crate::sona::{SonaIntegration, Trajectory}; +use crate::sona::{SonaIntegration, SonaTrajectory}; use dashmap::DashMap; use parking_lot::RwLock; use ruvector_core::index::hnsw::HnswIndex; use ruvector_core::index::VectorIndex; -use ruvector_core::types::{DistanceMetric, HnswConfig, SearchResult}; +use ruvector_core::types::{DistanceMetric, HnswConfig, QuantumVector, SearchResult}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; @@ -187,7 +187,7 @@ pub struct TaskPattern { pub id: String, /// Task embedding vector - pub embedding: Vec, + pub embedding: QuantumVector, /// Agent type that successfully handled this pattern pub agent_type: AgentType, @@ -220,7 +220,7 @@ pub struct TaskPattern { impl TaskPattern { /// Create a new task pattern pub fn new( - embedding: Vec, + embedding: QuantumVector, agent_type: AgentType, task_type: ClaudeFlowTask, task_description: String, @@ -394,22 +394,21 @@ impl HnswRouter { /// Add a new pattern to the index pub fn add_pattern(&self, pattern: TaskPattern) -> Result<()> { // Validate embedding dimension - if pattern.embedding.len() != self.config.embedding_dim { + if pattern.embedding.reconstruct().len() != self.config.embedding_dim { return Err(RuvLLMError::Config(format!( "Embedding dimension mismatch: expected {}, got {}", self.config.embedding_dim, - pattern.embedding.len() + pattern.embedding.reconstruct().len() ))); } - // Normalize embedding for cosine similarity - let embedding = self.normalize_embedding(&pattern.embedding); + let normalized = pattern.embedding.clone(); // Add to HNSW index { let mut index = self.index.write(); index - .add(pattern.id.clone(), embedding) + .add(pattern.id.clone(), normalized) .map_err(|e| RuvLLMError::Ruvector(e.to_string()))?; } @@ -429,12 +428,12 @@ impl HnswRouter { let mut entries = Vec::with_capacity(patterns.len()); for pattern in patterns { - if pattern.embedding.len() != self.config.embedding_dim { + if pattern.embedding.reconstruct().len() != self.config.embedding_dim { continue; // Skip invalid patterns } - let embedding = self.normalize_embedding(&pattern.embedding); - entries.push((pattern.id.clone(), embedding)); + let normalized = pattern.embedding.clone(); + entries.push((pattern.id.clone(), normalized)); self.index_to_pattern .insert(pattern.id.clone(), pattern.id.clone()); @@ -456,19 +455,15 @@ impl HnswRouter { } /// Search for similar patterns - pub fn search_similar(&self, query: &[f32], k: usize) -> Result> { + pub fn search_similar( + &self, + query: &QuantumVector, + k: usize, + ) -> Result> { let start = std::time::Instant::now(); // Validate and normalize query - if query.len() != self.config.embedding_dim { - return Err(RuvLLMError::Config(format!( - "Query dimension mismatch: expected {}, got {}", - self.config.embedding_dim, - query.len() - ))); - } - - let normalized_query = self.normalize_embedding(query); + let normalized_query = query.clone(); // Search HNSW index let results: Vec = { @@ -500,7 +495,10 @@ impl HnswRouter { } /// Route a task to the optimal agent based on semantic similarity - pub fn route_by_similarity(&self, query_embedding: &[f32]) -> Result { + pub fn route_by_similarity( + &self, + query_embedding: &QuantumVector, + ) -> Result { let start = std::time::Instant::now(); // Search for similar patterns @@ -594,7 +592,7 @@ impl HnswRouter { // Record trajectory for SONA if available if let Some(sona) = &self.sona { - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: uuid::Uuid::new_v4().to_string(), session_id: "hnsw-router".to_string(), query_embedding: pattern.embedding.clone(), @@ -619,7 +617,11 @@ impl HnswRouter { } /// Update success rate by finding the nearest pattern to a query - pub fn update_nearest_success(&self, query_embedding: &[f32], success: bool) -> Result { + pub fn update_nearest_success( + &self, + query_embedding: &QuantumVector, + success: bool, + ) -> Result { let similar = self.search_similar(query_embedding, 1)?; if let Some((pattern, similarity)) = similar.first() { @@ -635,7 +637,7 @@ impl HnswRouter { /// Learn a new pattern from a successful task pub fn learn_pattern( &self, - embedding: Vec, + embedding: QuantumVector, agent_type: AgentType, task_type: ClaudeFlowTask, task_description: String, @@ -947,7 +949,7 @@ impl HybridRouter { pub fn route( &self, task_description: &str, - embedding: &[f32], + embedding: &QuantumVector, keyword_decision: Option, ) -> Result { // Get HNSW semantic routing @@ -1043,7 +1045,7 @@ mod tests { // Add a pattern let embedding = create_test_embedding(42, 128); let pattern = TaskPattern::new( - embedding.clone(), + ruvector_core::types::QuantumVector::F32(embedding.clone()), AgentType::Coder, ClaudeFlowTask::CodeGeneration, "implement a function".to_string(), @@ -1052,7 +1054,8 @@ mod tests { router.add_pattern(pattern).unwrap(); // Search for similar - let results = router.search_similar(&embedding, 5).unwrap(); + let query = ruvector_core::types::QuantumVector::F32(embedding); + let results = router.search_similar(&query, 5).unwrap(); assert!(!results.is_empty()); assert_eq!(results[0].0.agent_type, AgentType::Coder); @@ -1082,8 +1085,12 @@ mod tests { ClaudeFlowTask::Testing }; - let mut pattern = - TaskPattern::new(embedding, agent_type, task_type, format!("task {}", i)); + let mut pattern = TaskPattern::new( + ruvector_core::types::QuantumVector::F32(embedding), + agent_type, + task_type, + format!("task {}", i), + ); pattern.usage_count = 10; pattern.success_count = 8; pattern.success_rate = 0.8; @@ -1092,7 +1099,7 @@ mod tests { } // Query similar to coder patterns - let query = create_test_embedding(150, 128); // Between coder embeddings + let query = ruvector_core::types::QuantumVector::F32(create_test_embedding(150, 128)); // Between coder embeddings let result = router.route_by_similarity(&query).unwrap(); assert!(result.confidence > 0.0); @@ -1110,7 +1117,7 @@ mod tests { let embedding = create_test_embedding(42, 128); let pattern = TaskPattern::new( - embedding, + ruvector_core::types::QuantumVector::F32(embedding), AgentType::Coder, ClaudeFlowTask::CodeGeneration, "test task".to_string(), @@ -1142,7 +1149,7 @@ mod tests { let embedding = create_test_embedding(42, 128); let pattern_id = router .learn_pattern( - embedding.clone(), + ruvector_core::types::QuantumVector::F32(embedding.clone()), AgentType::Researcher, ClaudeFlowTask::Research, "research best practices".to_string(), @@ -1171,7 +1178,7 @@ mod tests { // Add low-quality pattern let embedding = create_test_embedding(42, 128); let mut pattern = TaskPattern::new( - embedding, + ruvector_core::types::QuantumVector::F32(embedding), AgentType::Coder, ClaudeFlowTask::CodeGeneration, "bad task".to_string(), @@ -1185,7 +1192,7 @@ mod tests { // Add good pattern let embedding2 = create_test_embedding(100, 128); let mut pattern2 = TaskPattern::new( - embedding2, + ruvector_core::types::QuantumVector::F32(embedding2), AgentType::Coder, ClaudeFlowTask::CodeGeneration, "good task".to_string(), @@ -1215,7 +1222,7 @@ mod tests { for i in 0..5 { let embedding = create_test_embedding(i * 10, 128); let pattern = TaskPattern::new( - embedding, + ruvector_core::types::QuantumVector::F32(embedding), AgentType::Coder, ClaudeFlowTask::CodeGeneration, format!("task {}", i), @@ -1255,7 +1262,7 @@ mod tests { for i in 0..5 { let embedding = create_test_embedding(i * 10, 128); let pattern = TaskPattern::new( - embedding, + ruvector_core::types::QuantumVector::F32(embedding), AgentType::Coder, ClaudeFlowTask::CodeGeneration, format!("coding task {}", i), @@ -1264,7 +1271,7 @@ mod tests { } // Route with keyword decision - let query = create_test_embedding(25, 128); + let query = ruvector_core::types::QuantumVector::F32(create_test_embedding(25, 128)); let keyword_decision = RoutingDecision { primary_agent: AgentType::Coder, confidence: 0.8, diff --git a/crates/ruvllm/src/claude_flow/hooks_integration.rs b/crates/ruvllm/src/claude_flow/hooks_integration.rs index d2a78ad78..9a3e3dbf0 100644 --- a/crates/ruvllm/src/claude_flow/hooks_integration.rs +++ b/crates/ruvllm/src/claude_flow/hooks_integration.rs @@ -65,6 +65,7 @@ use crate::{ use chrono::{DateTime, Utc}; use dashmap::DashMap; use parking_lot::RwLock; +use ruvector_core::types::QuantumVector; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::Arc; @@ -492,7 +493,6 @@ impl HooksIntegration { let (agent_booster_available, agent_booster_intent) = self.check_agent_booster(&input.description); - // Get agent recommendation from HNSW if available let (recommended_agent, confidence, similar_patterns, suggested_approach) = if let Some(ref router) = self.hnsw_router { // Create a simple embedding from description @@ -730,7 +730,7 @@ impl HooksIntegration { if let Some(ref mut store) = self.pattern_store { let pattern = Pattern::new( - embedding, + QuantumVector::F32(embedding), PatternCategory::CodeGeneration, 1.0, // Success quality ) @@ -842,7 +842,7 @@ impl HooksIntegration { /// Route a task to optimal agent (convenience method) pub fn route_task(&self, task: &str, context: Option<&str>) -> Result { - let mut input = PreTaskInput { + let input = PreTaskInput { task_id: Uuid::new_v4().to_string(), description: task.to_string(), context: context.map(String::from), @@ -930,28 +930,9 @@ impl HooksIntegration { (agent.to_string(), confidence, Vec::new(), None) } - fn create_simple_embedding(&self, text: &str) -> Vec { - // Simple hash-based embedding for now - // In production, use a proper embedding model - let mut embedding = vec![0.0f32; self.config.embedding_dim]; - - for (i, word) in text.split_whitespace().enumerate() { - let hash = word - .bytes() - .fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64)); - let idx = (hash % self.config.embedding_dim as u64) as usize; - embedding[idx] += 1.0 / (i + 1) as f32; - } - - // Normalize - let norm: f32 = embedding.iter().map(|x| x * x).sum::().sqrt(); - if norm > 0.0 { - for x in &mut embedding { - *x /= norm; - } - } - - embedding + fn create_simple_embedding(&self, text: &str) -> QuantumVector { + let dim = self.config.embedding_dim; + QuantumVector::F32(create_simple_embedding_static(text, dim)) } fn parse_agent_type(&self, agent: &str) -> AgentType { @@ -1004,9 +985,13 @@ impl HooksIntegration { create_simple_embedding_static(&traj.description, self.config.embedding_dim); if let Some(ref mut store) = self.pattern_store { - let pattern = Pattern::new(embedding, PatternCategory::General, quality) - .with_lesson(traj.description.clone()) - .with_action(format!("Task completed by {}", agent)); + let pattern = Pattern::new( + ruvector_core::types::QuantumVector::F32(embedding), + PatternCategory::General, + quality, + ) + .with_lesson(traj.description.clone()) + .with_action(format!("Task completed by {}", agent)); if store.store_pattern(pattern).is_ok() { *self.patterns_added.write() += 1; diff --git a/crates/ruvllm/src/claude_flow/pretrain_pipeline.rs b/crates/ruvllm/src/claude_flow/pretrain_pipeline.rs index 86e11b021..27d1423e4 100644 --- a/crates/ruvllm/src/claude_flow/pretrain_pipeline.rs +++ b/crates/ruvllm/src/claude_flow/pretrain_pipeline.rs @@ -34,7 +34,7 @@ use super::task_generator::{GeneratedTask, TaskCategory, TaskComplexity, TaskGen use super::{ClaudeFlowAgent, ClaudeFlowTask}; use crate::sona::{ PretrainSample, RoutingPretrainResult, RuvLtraPretrainConfig, RuvLtraPretrainer, SeedingResult, - SonaConfig, SonaIntegration, Trajectory, + SonaConfig, SonaIntegration, SonaTrajectory, }; use parking_lot::RwLock; use ruvector_sona::{ diff --git a/crates/ruvllm/src/claude_flow/reasoning_bank.rs b/crates/ruvllm/src/claude_flow/reasoning_bank.rs index d183dae1b..eb01eefd9 100644 --- a/crates/ruvllm/src/claude_flow/reasoning_bank.rs +++ b/crates/ruvllm/src/claude_flow/reasoning_bank.rs @@ -63,8 +63,9 @@ use super::AgentType; use crate::error::{Result, RuvLLMError}; -use crate::sona::{SonaConfig, SonaIntegration, Trajectory as SonaTrajectory}; +use crate::sona::{SonaConfig, SonaIntegration, SonaTrajectory}; use parking_lot::RwLock; +use ruvector_core::types::QuantumVector; use ruvector_sona::{ EwcConfig, EwcPlusPlus, LearnedPattern, PatternConfig, PatternType, ReasoningBank, }; @@ -233,7 +234,7 @@ pub struct Trajectory { /// Unique task identifier pub task_id: String, /// Task embedding vector - pub embedding: Vec, + pub embedding: QuantumVector, /// Execution steps pub steps: Vec, /// Final verdict @@ -254,7 +255,7 @@ impl Trajectory { /// Create a new trajectory pub fn new( task_id: impl Into, - embedding: Vec, + embedding: QuantumVector, steps: Vec, verdict: Verdict, ) -> Self { @@ -454,7 +455,7 @@ pub struct DistilledPattern { /// Pattern identifier pub id: u64, /// Centroid embedding - pub centroid: Vec, + pub centroid: QuantumVector, /// Primary agent association pub primary_agent: AgentType, /// Agent score distribution @@ -476,33 +477,13 @@ pub struct DistilledPattern { impl DistilledPattern { /// Compute similarity with embedding using optimized dot product #[inline] - pub fn similarity(&self, embedding: &[f32]) -> f32 { - let len = self.centroid.len(); - if len != embedding.len() { - return 0.0; - } - - // Compute all in single pass for cache efficiency - let mut dot: f32 = 0.0; - let mut norm_a_sq: f32 = 0.0; - let mut norm_b_sq: f32 = 0.0; - - for i in 0..len { - let a = self.centroid[i]; - let b = embedding[i]; - dot += a * b; - norm_a_sq += a * a; - norm_b_sq += b * b; - } - - let norm_a = norm_a_sq.sqrt(); - let norm_b = norm_b_sq.sqrt(); - - if norm_a > 1e-8 && norm_b > 1e-8 { - dot / (norm_a * norm_b) - } else { - 0.0 - } + pub fn similarity(&self, embedding: &QuantumVector) -> f32 { + 1.0 - ruvector_core::distance::distance( + &self.centroid.reconstruct(), + &embedding.reconstruct(), + ruvector_core::types::DistanceMetric::Cosine, + ) + .unwrap_or(1.0) } /// Get best agent from this pattern @@ -593,11 +574,11 @@ impl ReasoningBankIntegration { pub fn record_trajectory( &self, task_id: impl Into, - embedding: &[f32], + embedding: &QuantumVector, steps: Vec, verdict: Verdict, ) -> Result<()> { - let trajectory = Trajectory::new(task_id, embedding.to_vec(), steps, verdict.clone()); + let trajectory = Trajectory::new(task_id, embedding.clone(), steps, verdict.clone()); // Update statistics { @@ -634,8 +615,8 @@ impl ReasoningBankIntegration { let sona_trajectory = SonaTrajectory { request_id: trajectory.task_id.clone(), session_id: "reasoning-bank".to_string(), - query_embedding: embedding.to_vec(), - response_embedding: embedding.to_vec(), + query_embedding: embedding.clone(), + response_embedding: embedding.clone(), quality_score: trajectory.quality_score, routing_features: vec![ trajectory.quality_score, @@ -654,7 +635,7 @@ impl ReasoningBankIntegration { { let mut core = self.core_bank.write(); let query_traj = - ruvector_sona::QueryTrajectory::new(trajectory.timestamp, embedding.to_vec()); + ruvector_sona::QueryTrajectory::new(trajectory.timestamp, embedding.reconstruct()); core.add_trajectory(&query_traj); } @@ -706,34 +687,22 @@ impl ReasoningBankIntegration { continue; } - // Compute centroid - let dim = cluster[0].embedding.len(); - let mut centroid = vec![0.0f32; dim]; - for traj in &cluster { - for (i, &e) in traj.embedding.iter().enumerate() { - if i < dim { - centroid[i] += e; - } - } - } - for c in &mut centroid { - *c /= cluster.len() as f32; - } - - // Normalize centroid - let norm: f32 = centroid.iter().map(|x| x * x).sum::().sqrt(); - if norm > 1e-8 { - for c in &mut centroid { - *c /= norm; - } - } - // Compute agent scores let mut agent_scores: HashMap = HashMap::new(); let mut total_quality = 0.0f32; let mut task_type: Option = None; + // Compute mean embedding for cluster + let first_t = &cluster[0]; + let dim = first_t.embedding.reconstruct().len(); + let mut centroid_raw = vec![0.0f32; dim]; for traj in &cluster { + let v = traj.embedding.reconstruct(); + for (i, &val) in v.iter().enumerate() { + if i < dim { + centroid_raw[i] += val; + } + } if let Some(agent) = traj.primary_agent { *agent_scores.entry(agent).or_insert(0.0) += traj.quality_score; } @@ -743,6 +712,11 @@ impl ReasoningBankIntegration { } } + for val in centroid_raw.iter_mut() { + *val /= cluster.len() as f32; + } + let centroid = QuantumVector::F32(centroid_raw); + // Normalize agent scores let total_agent_score: f32 = agent_scores.values().sum(); if total_agent_score > 0.0 { @@ -803,10 +777,11 @@ impl ReasoningBankIntegration { // Simple K-means style clustering let k = self.config.num_clusters.min(trajectories.len() / 3).max(1); - let dim = trajectories[0].embedding.len(); + let first_t = &trajectories[0]; + let dim = first_t.embedding.reconstruct().len(); // Initialize centroids with first k trajectories - let mut centroids: Vec> = trajectories + let mut centroids: Vec = trajectories .iter() .take(k) .map(|t| t.embedding.clone()) @@ -822,7 +797,17 @@ impl ReasoningBankIntegration { let nearest = centroids .iter() .enumerate() - .map(|(j, c)| (j, self.cosine_similarity(&traj.embedding, c))) + .map(|(j, c)| { + ( + j, + 1.0 - ruvector_core::distance::distance( + &traj.embedding.reconstruct(), + &c.reconstruct(), + ruvector_core::types::DistanceMetric::Cosine, + ) + .unwrap_or(1.0), + ) + }) .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)) .map(|(j, _)| j) .unwrap_or(0); @@ -838,28 +823,32 @@ impl ReasoningBankIntegration { } // Recompute centroids - let mut new_centroids = vec![vec![0.0f32; dim]; k]; + let mut new_centroids_raw = vec![vec![0.0f32; dim]; k]; let mut counts = vec![0usize; k]; for (i, traj) in trajectories.iter().enumerate() { - let cluster = assignments[i]; - counts[cluster] += 1; - for (j, &e) in traj.embedding.iter().enumerate() { + let cluster_idx = assignments[i]; + counts[cluster_idx] += 1; + let v = traj.embedding.reconstruct(); + for (j, &val) in v.iter().enumerate() { if j < dim { - new_centroids[cluster][j] += e; + new_centroids_raw[cluster_idx][j] += val; } } } - for (i, centroid) in new_centroids.iter_mut().enumerate() { - if counts[i] > 0 { - for c in centroid.iter_mut() { - *c /= counts[i] as f32; + centroids = new_centroids_raw + .into_iter() + .enumerate() + .map(|(i, mut v)| { + if counts[i] > 0 { + for val in v.iter_mut() { + *val /= counts[i] as f32; + } } - } - } - - centroids = new_centroids; + QuantumVector::F32(v) + }) + .collect(); } // Group trajectories by assignment @@ -872,36 +861,13 @@ impl ReasoningBankIntegration { clusters.into_iter().filter(|c| c.len() >= 2).collect() } - /// Cosine similarity between two vectors - /// Optimized to compute all norms in a single pass - #[inline] - fn cosine_similarity(&self, a: &[f32], b: &[f32]) -> f32 { - let len = a.len(); - if len != b.len() { - return 0.0; - } - - // Single-pass computation for cache efficiency - let mut dot: f32 = 0.0; - let mut norm_a_sq: f32 = 0.0; - let mut norm_b_sq: f32 = 0.0; - - for i in 0..len { - let x = a[i]; - let y = b[i]; - dot += x * y; - norm_a_sq += x * x; - norm_b_sq += y * y; - } - - let norm_a = norm_a_sq.sqrt(); - let norm_b = norm_b_sq.sqrt(); - - if norm_a > 1e-8 && norm_b > 1e-8 { - dot / (norm_a * norm_b) - } else { - 0.0 - } + fn cosine_similarity(&self, a: &QuantumVector, b: &QuantumVector) -> f32 { + 1.0 - ruvector_core::distance::distance( + &a.reconstruct(), + &b.reconstruct(), + ruvector_core::types::DistanceMetric::Cosine, + ) + .unwrap_or(1.0) } /// Update EWC from new patterns @@ -910,8 +876,8 @@ impl ReasoningBankIntegration { for pattern in patterns { // Use centroid as pseudo-gradients - let gradients: Vec = pattern - .centroid + let v = pattern.centroid.reconstruct(); + let gradients: Vec = v .iter() .take(self.config.embedding_dim) .copied() @@ -929,7 +895,7 @@ impl ReasoningBankIntegration { } /// Get routing recommendation for an embedding - pub fn get_recommendation(&self, embedding: &[f32]) -> RoutingRecommendation { + pub fn get_recommendation(&self, embedding: &QuantumVector) -> RoutingRecommendation { let patterns = self.patterns.read(); if patterns.is_empty() { @@ -1114,12 +1080,15 @@ impl ReasoningBankIntegration { let w2 = p2.trajectory_count as f32 / total_count as f32; // Merge centroids - let centroid: Vec = p1 + // Merge centroids + let centroid_vec: Vec = p1 .centroid + .reconstruct() .iter() - .zip(&p2.centroid) + .zip(p2.centroid.reconstruct().iter()) .map(|(&a, &b)| a * w1 + b * w2) .collect(); + let centroid = QuantumVector::F32(centroid_vec); // Merge agent scores let mut agent_scores: HashMap = p1.agent_scores.clone(); @@ -1275,7 +1244,7 @@ mod tests { let traj = Trajectory::new( "task-1", - vec![0.1, 0.2, 0.3], + ruvector_core::types::QuantumVector::F32(vec![0.1, 0.2, 0.3]), steps, Verdict::Success { reason: "done".into(), @@ -1309,7 +1278,7 @@ mod tests { bank.record_trajectory( "task-1", - &vec![0.1; 384], + &ruvector_core::types::QuantumVector::F32(vec![0.1; 384]), steps, Verdict::Success { reason: "done".into(), @@ -1355,7 +1324,7 @@ mod tests { bank.record_trajectory( format!("task-{}", i), - &embedding, + &ruvector_core::types::QuantumVector::F32(embedding), steps, Verdict::Success { reason: "done".into(), @@ -1391,7 +1360,7 @@ mod tests { bank.record_trajectory( format!("task-{}", i), - &embedding, + &ruvector_core::types::QuantumVector::F32(embedding), steps, Verdict::Success { reason: "done".into(), @@ -1408,8 +1377,9 @@ mod tests { .chain(std::iter::repeat(0.0)) .take(384) .collect(); + let query_vec = ruvector_core::types::QuantumVector::F32(query); - let rec = bank.get_recommendation(&query); + let rec = bank.get_recommendation(&query_vec); assert!(rec.patterns_used > 0); assert!(rec.confidence > 0.0); } @@ -1438,7 +1408,7 @@ mod tests { bank.record_trajectory( format!("task-{}", i), - &embedding, + &QuantumVector::F32(embedding), steps, Verdict::Success { reason: "done".into(), @@ -1460,7 +1430,7 @@ mod tests { fn test_distilled_pattern_similarity() { let pattern = DistilledPattern { id: 1, - centroid: vec![1.0, 0.0, 0.0, 0.0], + centroid: QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), primary_agent: AgentType::Coder, agent_scores: HashMap::new(), avg_quality: 0.9, @@ -1471,8 +1441,8 @@ mod tests { access_count: 0, }; - let same = vec![1.0, 0.0, 0.0, 0.0]; - let orthogonal = vec![0.0, 1.0, 0.0, 0.0]; + let same = QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]); + let orthogonal = QuantumVector::F32(vec![0.0, 1.0, 0.0, 0.0]); assert!((pattern.similarity(&same) - 1.0).abs() < 0.01); assert!(pattern.similarity(&orthogonal).abs() < 0.01); @@ -1486,7 +1456,7 @@ mod tests { // Create some patterns manually let pattern = DistilledPattern { id: 42, - centroid: vec![0.5; 384], + centroid: QuantumVector::F32(vec![0.5; 384]), primary_agent: AgentType::Researcher, agent_scores: HashMap::from([(AgentType::Researcher, 0.8), (AgentType::Coder, 0.2)]), avg_quality: 0.85, diff --git a/crates/ruvllm/src/context/agentic_memory.rs b/crates/ruvllm/src/context/agentic_memory.rs index f90ecb433..c3f0e958d 100644 --- a/crates/ruvllm/src/context/agentic_memory.rs +++ b/crates/ruvllm/src/context/agentic_memory.rs @@ -7,7 +7,7 @@ use chrono::{DateTime, Utc}; use parking_lot::RwLock; use ruvector_core::index::hnsw::HnswIndex; use ruvector_core::index::VectorIndex; -use ruvector_core::types::{DistanceMetric, HnswConfig}; +use ruvector_core::types::{DistanceMetric, HnswConfig, QuantumVector}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; @@ -81,7 +81,7 @@ pub struct SemanticFact { /// Fact content pub content: String, /// Fact embedding - pub embedding: Vec, + pub embedding: QuantumVector, /// Confidence score pub confidence: f32, /// Source (where this fact came from) @@ -112,7 +112,7 @@ pub struct ProceduralSkill { /// Trigger conditions (when to use this skill) pub triggers: Vec, /// Skill embedding - pub embedding: Vec, + pub embedding: QuantumVector, /// Success rate pub success_rate: f32, /// Execution count @@ -219,7 +219,7 @@ impl AgenticMemory { &self, key: &str, content: &str, - embedding: Vec, + embedding: QuantumVector, memory_type: MemoryType, ) -> Result { self.stats.stores.fetch_add(1, Ordering::SeqCst); @@ -259,7 +259,7 @@ impl AgenticMemory { &self, id: &str, content: &str, - embedding: Vec, + embedding: QuantumVector, confidence: f32, source: &str, tags: Vec, @@ -328,7 +328,7 @@ impl AgenticMemory { /// Retrieve from memory by query pub fn retrieve( &self, - query_embedding: &[f32], + query_embedding: &QuantumVector, memory_type: MemoryType, k: usize, ) -> Result> { @@ -430,7 +430,11 @@ impl AgenticMemory { } /// Get relevant memories across all types - pub fn get_relevant(&self, query_embedding: &[f32], k: usize) -> Result> { + pub fn get_relevant( + &self, + query_embedding: &QuantumVector, + k: usize, + ) -> Result> { let mut all_results = Vec::new(); // Get from each memory type @@ -701,8 +705,8 @@ pub struct AgenticMemoryStats { mod tests { use super::*; - fn test_embedding(dim: usize) -> Vec { - vec![0.1; dim] + fn test_embedding(dim: usize) -> QuantumVector { + QuantumVector::F32(vec![0.1; dim]) } #[test] diff --git a/crates/ruvllm/src/context/context_manager.rs b/crates/ruvllm/src/context/context_manager.rs index 68d0423bc..6eefd46c9 100644 --- a/crates/ruvllm/src/context/context_manager.rs +++ b/crates/ruvllm/src/context/context_manager.rs @@ -14,6 +14,7 @@ use crate::error::{Result, RuvLLMError}; use super::agentic_memory::{AgenticMemory, AgenticMemoryConfig, MemoryType, RetrievedMemory}; use super::semantic_cache::{SemanticCacheConfig, SemanticToolCache}; +use ruvector_core::types::QuantumVector; /// Model token limits #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -408,7 +409,7 @@ impl IntelligentContextManager { pub fn prepare_context( &self, messages: &[Message], - query_embedding: Option<&[f32]>, + query_embedding: Option<&QuantumVector>, model: Option, ) -> Result { let start = std::time::Instant::now(); @@ -597,7 +598,7 @@ impl IntelligentContextManager { &self, key: &str, content: &str, - embedding: Vec, + embedding: QuantumVector, memory_type: MemoryType, ) -> Result { self.memory.store(key, content, embedding, memory_type) @@ -609,7 +610,7 @@ impl IntelligentContextManager { tool_name: &str, input: &str, result: &str, - embedding: Vec, + embedding: QuantumVector, ) -> Result<()> { self.cache.store(tool_name, input, result, embedding) } @@ -718,7 +719,7 @@ mod tests { let manager = IntelligentContextManager::new(config).unwrap(); // Store some memory - let embedding = vec![0.1; 128]; + let embedding = QuantumVector::F32(vec![0.1; 128]); manager .store_memory( "fact-1", diff --git a/crates/ruvllm/src/context/episodic_memory.rs b/crates/ruvllm/src/context/episodic_memory.rs index e4b2a89f3..416c6684f 100644 --- a/crates/ruvllm/src/context/episodic_memory.rs +++ b/crates/ruvllm/src/context/episodic_memory.rs @@ -7,7 +7,7 @@ use chrono::{DateTime, Duration, Utc}; use parking_lot::RwLock; use ruvector_core::index::hnsw::HnswIndex; use ruvector_core::index::VectorIndex; -use ruvector_core::types::{DistanceMetric, HnswConfig}; +use ruvector_core::types::{DistanceMetric, HnswConfig, QuantumVector}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; @@ -82,7 +82,7 @@ pub struct TrajectoryStep { /// Result of action pub result: Option, /// Step embedding - pub embedding: Option>, + pub embedding: Option, /// Reward signal pub reward: f32, /// Timestamp @@ -122,7 +122,7 @@ pub struct Episode { /// Episode ID pub id: String, /// Episode embedding (summary) - pub embedding: Vec, + pub embedding: QuantumVector, /// Episode metadata pub metadata: EpisodeMetadata, /// Full trajectory (may be compressed) @@ -135,7 +135,7 @@ pub struct Episode { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompressedEpisode { /// Compressed embedding (may be lower dimension) - pub embedding: Vec, + pub embedding: QuantumVector, /// Summary text pub summary: String, /// Key observations @@ -237,22 +237,26 @@ impl MemoryCompressor { } /// Compress embedding (average or reduce dimensions) - fn compress_embedding(&self, steps: &[&TrajectoryStep]) -> Vec { - let embeddings: Vec<&Vec> = + fn compress_embedding(&self, steps: &[&TrajectoryStep]) -> QuantumVector { + let embeddings: Vec<&QuantumVector> = steps.iter().filter_map(|s| s.embedding.as_ref()).collect(); if embeddings.is_empty() { - return Vec::new(); + return QuantumVector::F32(vec![]); } - let dim = embeddings[0].len(); + let v0 = embeddings[0].reconstruct(); + let dim = v0.len(); let target_dim = self.target_dim.unwrap_or(dim); // Average embeddings let mut avg = vec![0.0f32; dim]; for emb in &embeddings { - for (i, v) in emb.iter().enumerate() { - avg[i] += v; + let v = emb.reconstruct(); + for (i, &val) in v.iter().enumerate() { + if i < dim { + avg[i] += val; + } } } let n = embeddings.len() as f32; @@ -260,12 +264,12 @@ impl MemoryCompressor { *v /= n; } - // Simple dimensionality reduction if needed (truncation - in production use PCA) + // Simple dimensionality reduction if needed if target_dim < dim { avg.truncate(target_dim); } - avg + QuantumVector::F32(avg) } } @@ -335,7 +339,7 @@ impl EpisodicMemory { pub fn store_episode( &self, trajectory: Trajectory, - summary_embedding: Vec, + summary_embedding: QuantumVector, tags: Vec, ) -> Result { let episode_id = trajectory.id.clone(); @@ -386,7 +390,11 @@ impl EpisodicMemory { } /// Search for similar episodes - pub fn search_similar(&self, query_embedding: &[f32], k: usize) -> Result> { + pub fn search_similar( + &self, + query_embedding: &QuantumVector, + k: usize, + ) -> Result> { let start = std::time::Instant::now(); let results = { @@ -418,7 +426,7 @@ impl EpisodicMemory { /// Search with filtering pub fn search_with_filter( &self, - query_embedding: &[f32], + query_embedding: &QuantumVector, k: usize, filter: F, ) -> Result> @@ -441,7 +449,7 @@ impl EpisodicMemory { /// Search by task type pub fn search_by_task_type( &self, - query_embedding: &[f32], + query_embedding: &QuantumVector, task_type: &str, k: usize, ) -> Result> { @@ -451,7 +459,7 @@ impl EpisodicMemory { /// Search successful episodes only pub fn search_successful( &self, - query_embedding: &[f32], + query_embedding: &QuantumVector, min_quality: f32, k: usize, ) -> Result> { @@ -600,8 +608,8 @@ impl EpisodicMemory { mod tests { use super::*; - fn test_embedding(dim: usize) -> Vec { - vec![0.1; dim] + fn test_embedding(dim: usize) -> QuantumVector { + QuantumVector::F32(vec![0.1; dim]) } fn test_trajectory() -> Trajectory { @@ -612,7 +620,7 @@ mod tests { state: "Initial state".to_string(), action: "read_file /src/main.rs".to_string(), result: Some("file contents".to_string()), - embedding: Some(vec![0.1; 128]), + embedding: Some(QuantumVector::F32(vec![0.1; 128])), reward: 0.5, timestamp: Utc::now(), }, @@ -620,7 +628,7 @@ mod tests { state: "After reading".to_string(), action: "edit_file /src/main.rs".to_string(), result: Some("edited".to_string()), - embedding: Some(vec![0.2; 128]), + embedding: Some(QuantumVector::F32(vec![0.2; 128])), reward: 0.8, timestamp: Utc::now(), }, diff --git a/crates/ruvllm/src/context/semantic_cache.rs b/crates/ruvllm/src/context/semantic_cache.rs index 3054dcd6e..08671416e 100644 --- a/crates/ruvllm/src/context/semantic_cache.rs +++ b/crates/ruvllm/src/context/semantic_cache.rs @@ -61,7 +61,7 @@ pub struct CachedToolResult { /// Input hash for exact matching pub input_hash: String, /// Input embedding for similarity matching - pub embedding: Vec, + pub embedding: ruvector_core::types::QuantumVector, /// Tool result pub result: String, /// Success status @@ -157,7 +157,7 @@ impl SemanticToolCache { tool_name: &str, input: &str, result: &str, - embedding: Vec, + embedding: ruvector_core::types::QuantumVector, ) -> Result<()> { self.store_with_options( tool_name, @@ -176,7 +176,7 @@ impl SemanticToolCache { tool_name: &str, input: &str, result: &str, - embedding: Vec, + embedding: ruvector_core::types::QuantumVector, success: bool, ttl: Duration, metadata: HashMap, @@ -225,7 +225,10 @@ impl SemanticToolCache { } /// Get cached result by embedding similarity - pub fn get(&self, query_embedding: &[f32]) -> Result> { + pub fn get( + &self, + query_embedding: &ruvector_core::types::QuantumVector, + ) -> Result> { self.stats.lookups.fetch_add(1, Ordering::SeqCst); // Search for similar entries @@ -331,7 +334,7 @@ impl SemanticToolCache { &self, tool_name: &str, input: &str, - embedding: Vec, + embedding: ruvector_core::types::QuantumVector, execute: F, ) -> std::result::Result where @@ -519,9 +522,10 @@ impl SemanticToolCache { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; - fn test_embedding(dim: usize) -> Vec { - vec![0.1; dim] + fn test_embedding(dim: usize) -> QuantumVector { + QuantumVector::F32(vec![0.1; dim]) } #[test] diff --git a/crates/ruvllm/src/context/working_memory.rs b/crates/ruvllm/src/context/working_memory.rs index c0a28180c..bd3ed8a9b 100644 --- a/crates/ruvllm/src/context/working_memory.rs +++ b/crates/ruvllm/src/context/working_memory.rs @@ -3,8 +3,11 @@ //! Provides fast access to current task state, tool results, and reasoning steps //! with time-decaying attention weights. +use crate::error::Result; +use crate::utils::cosine_similarity; use chrono::{DateTime, Duration, Utc}; use parking_lot::RwLock; +use ruvector_core::types::QuantumVector; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, VecDeque}; use std::sync::Arc; @@ -48,7 +51,7 @@ pub struct TaskContext { /// Current status pub status: TaskStatus, /// Task embedding (for similarity search) - pub embedding: Option>, + pub embedding: Option, /// Files being worked on pub active_files: Vec, /// Current step index in multi-step tasks @@ -109,7 +112,7 @@ pub struct ScratchpadEntry { /// Timestamp pub timestamp: DateTime, /// Optional embedding for semantic search - pub embedding: Option>, + pub embedding: Option, /// Reference to related entries pub related_entries: Vec, } @@ -317,7 +320,7 @@ impl WorkingMemory { &self, content: String, entry_type: ScratchpadEntryType, - embedding: Option>, + embedding: Option, ) { let mut scratchpad = self.scratchpad.write(); @@ -355,14 +358,24 @@ impl WorkingMemory { } /// Search scratchpad by similarity (requires embeddings) - pub fn search_scratchpad(&self, query_embedding: &[f32], k: usize) -> Vec { + pub fn search_scratchpad( + &self, + query_embedding: &QuantumVector, + k: usize, + ) -> Vec { let scratchpad = self.scratchpad.read(); let mut with_scores: Vec<(f32, &ScratchpadEntry)> = scratchpad .iter() .filter_map(|entry| { entry.embedding.as_ref().map(|emb| { - let score = cosine_similarity(query_embedding, emb); + let score = 1.0 + - ruvector_core::distance::distance( + &query_embedding.reconstruct(), + &emb.reconstruct(), + ruvector_core::types::DistanceMetric::Cosine, + ) + .unwrap_or(1.0); (score, entry) }) }) @@ -555,22 +568,7 @@ pub struct WorkingMemoryStats { pub attention_entries: usize, } -/// Calculate cosine similarity between two vectors -fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { - if a.len() != b.len() { - return 0.0; - } - - let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); - let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); - let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); - - if norm_a > 0.0 && norm_b > 0.0 { - dot / (norm_a * norm_b) - } else { - 0.0 - } -} +// Helper removed: use QuantumVector::cosine_similarity #[cfg(test)] mod tests { diff --git a/crates/ruvllm/src/evaluation/real_harness.rs b/crates/ruvllm/src/evaluation/real_harness.rs index 34d6729c5..3fe6680f0 100644 --- a/crates/ruvllm/src/evaluation/real_harness.rs +++ b/crates/ruvllm/src/evaluation/real_harness.rs @@ -11,7 +11,7 @@ use super::harness::{ }; use crate::backends::{create_backend, GenerateParams, LlmBackend, ModelConfig}; use crate::claude_flow::{AgentType, ClaudeFlowTask, HnswRouter, HnswRouterConfig, TaskPattern}; -use crate::sona::integration::{SonaConfig, SonaIntegration, Trajectory}; +use crate::sona::integration::{SonaConfig, SonaIntegration, SonaTrajectory}; use crate::Result; use parking_lot::RwLock; @@ -278,7 +278,7 @@ impl RealEvaluationHarness { let embedding = Self::create_seed_embedding(description, dim, i); let mut pattern = - TaskPattern::new(embedding, *agent_type, *task_type, description.to_string()); + TaskPattern::new(ruvector_core::types::QuantumVector::F32(embedding), *agent_type, *task_type, description.to_string()); // Give seed patterns initial trust pattern.usage_count = 10; pattern.success_count = 8; @@ -448,7 +448,7 @@ impl RealEvaluationHarness { .unwrap_or_else(|_| Self::create_seed_embedding(task_description, 384, 0)); // Use full routing with confidence scores - let hnsw_result = router.route_by_similarity(&embedding)?; + let hnsw_result = router.route_by_similarity(&ruvector_core::types::QuantumVector::F32(embedding.clone()))?; Ok(RoutingResult { primary_agent: hnsw_result.primary_agent, @@ -485,7 +485,7 @@ impl RealEvaluationHarness { let task_type = Self::classify_task_type(&task.description); router.learn_pattern( - embedding, + ruvector_core::types::QuantumVector::F32(embedding), AgentType::Coder, // Default for code tasks task_type, task.description.clone(), @@ -503,11 +503,11 @@ impl RealEvaluationHarness { .and_then(|p| self.get_embedding(p).ok()) .unwrap_or_default(); - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: task.id.clone(), session_id: "eval".to_string(), - query_embedding, - response_embedding, + query_embedding: ruvector_core::types::QuantumVector::F32(query_embedding), + response_embedding: ruvector_core::types::QuantumVector::F32(response_embedding), quality_score: if success { 0.9 } else { 0.3 }, routing_features: vec![], model_index: 0, diff --git a/crates/ruvllm/src/kv_cache.rs b/crates/ruvllm/src/kv_cache.rs index c303d8a6f..15b3fbd78 100644 --- a/crates/ruvllm/src/kv_cache.rs +++ b/crates/ruvllm/src/kv_cache.rs @@ -797,7 +797,7 @@ impl TwoTierKvCache { )); } - let current_tokens = self.total_tokens.load(Ordering::SeqCst); + let current_tokens = self.total_tokens.fetch_add(num_tokens, Ordering::SeqCst); // Add to tail let mut tail = self.tail.write(); @@ -825,9 +825,6 @@ impl TwoTierKvCache { store.push(quantized); } } - - self.total_tokens.fetch_add(num_tokens, Ordering::SeqCst); - // Enforce max tokens limit self.enforce_max_tokens()?; diff --git a/crates/ruvllm/src/lib.rs b/crates/ruvllm/src/lib.rs index f991a3145..62a722098 100644 --- a/crates/ruvllm/src/lib.rs +++ b/crates/ruvllm/src/lib.rs @@ -146,6 +146,7 @@ pub mod speculative; pub mod tokenizer; pub mod training; pub mod types; +pub mod utils; pub mod witness_log; // Test modules @@ -902,7 +903,10 @@ impl RuvLLMEngine { context_embedding: &[f32], limit: usize, ) -> Result> { - self.policy_store.search(context_embedding, limit) + self.policy_store.search( + &ruvector_core::types::QuantumVector::F32(context_embedding.to_vec()), + limit, + ) } /// Record a witness entry for audit logging. @@ -944,7 +948,10 @@ impl RuvLLMEngine { query_embedding: &[f32], limit: usize, ) -> Result> { - self.witness_log.search(query_embedding, limit) + self.witness_log.search( + &ruvector_core::types::QuantumVector::F32(query_embedding.to_vec()), + limit, + ) } /// Get the SONA integration for learning diff --git a/crates/ruvllm/src/models/ruvltra.rs b/crates/ruvllm/src/models/ruvltra.rs index cca0b6e90..6c9551706 100644 --- a/crates/ruvllm/src/models/ruvltra.rs +++ b/crates/ruvllm/src/models/ruvltra.rs @@ -54,12 +54,13 @@ use crate::error::{Result, RuvLLMError}; use crate::kernels::rope::{precompute_rope_tables_with_config, RopeConfig, RopeTables}; use crate::kernels::{apply_rope_neon, flash_attention_neon, rms_norm_neon, AttentionConfig}; -use crate::sona::{SonaConfig, SonaIntegration, Trajectory}; +use crate::sona::{SonaConfig, SonaIntegration, SonaTrajectory}; #[cfg(target_arch = "aarch64")] use std::arch::aarch64::*; use parking_lot::RwLock; +use ruvector_core::types::QuantumVector; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -1101,7 +1102,7 @@ impl RuvLtraModel { } /// Record a trajectory for SONA learning - pub fn record_trajectory(&self, trajectory: Trajectory) -> Result<()> { + pub fn record_trajectory(&self, trajectory: SonaTrajectory) -> Result<()> { if let Some(sona) = &self.sona { sona.write().record_trajectory(trajectory)?; } @@ -1113,9 +1114,10 @@ impl RuvLtraModel { &self, query_embedding: &[f32], ) -> Option { - self.sona - .as_ref() - .map(|sona| sona.read().get_routing_recommendation(query_embedding)) + self.sona.as_ref().map(|sona| { + let q_vec = QuantumVector::F32(query_embedding.to_vec()); + sona.read().get_routing_recommendation(&q_vec) + }) } /// Get model info diff --git a/crates/ruvllm/src/models/ruvltra_medium.rs b/crates/ruvllm/src/models/ruvltra_medium.rs index 8b5b325ae..c9e787f61 100644 --- a/crates/ruvllm/src/models/ruvltra_medium.rs +++ b/crates/ruvllm/src/models/ruvltra_medium.rs @@ -66,7 +66,7 @@ use crate::error::{Result, RuvLLMError}; use crate::kernels::rope::{precompute_rope_tables_with_config, RopeConfig, RopeTables}; use crate::kernels::{apply_rope_neon, flash_attention_neon, rms_norm_neon, AttentionConfig}; use crate::paged_attention::{PageTable, PagedAttention, PagedAttentionConfig}; -use crate::sona::{SonaConfig, SonaIntegration, Trajectory}; +use crate::sona::{SonaConfig, SonaIntegration, SonaTrajectory}; /// Type alias for PagedAttention used as KV cache pub type PagedKVCache = PagedAttention; diff --git a/crates/ruvllm/src/optimization/sona_llm.rs b/crates/ruvllm/src/optimization/sona_llm.rs index 61b4982a1..597f0221f 100644 --- a/crates/ruvllm/src/optimization/sona_llm.rs +++ b/crates/ruvllm/src/optimization/sona_llm.rs @@ -35,7 +35,7 @@ use crate::error::{Result, RuvLLMError}; use crate::lora::{ AdaptFeedback, MicroLoRA, MicroLoraConfig, TargetModule, TrainingConfig, TrainingPipeline, }; -use crate::sona::{SonaConfig, SonaIntegration, Trajectory}; +use crate::sona::{SonaConfig, SonaIntegration, SonaTrajectory}; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, VecDeque}; @@ -431,11 +431,15 @@ impl SonaLlm { { let sona = self.sona.write(); for sample in &samples { - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: format!("bg-{}", self.instant_count.load(Ordering::Relaxed)), session_id: sample.session_id.clone(), - query_embedding: sample.input_embedding.clone(), - response_embedding: sample.output_embedding.clone(), + query_embedding: ruvector_core::types::QuantumVector::F32( + sample.input_embedding.clone(), + ), + response_embedding: ruvector_core::types::QuantumVector::F32( + sample.output_embedding.clone(), + ), quality_score: sample.quality, routing_features: vec![sample.quality, sample.latency_ms / 1000.0], model_index: sample.model_index, diff --git a/crates/ruvllm/src/policy_store.rs b/crates/ruvllm/src/policy_store.rs index accd2e27f..14b10b03e 100644 --- a/crates/ruvllm/src/policy_store.rs +++ b/crates/ruvllm/src/policy_store.rs @@ -13,7 +13,7 @@ use crate::error::{Result, RuvLLMError}; use chrono::{DateTime, Utc}; -use ruvector_core::types::DbOptions; +use ruvector_core::types::{DbOptions, QuantumVector}; use ruvector_core::{AgenticDB, SearchQuery, VectorEntry}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -63,7 +63,7 @@ pub struct PolicyEntry { /// Policy type pub policy_type: PolicyType, /// Embedding vector for semantic search (768-D) - pub embedding: Vec, + pub embedding: QuantumVector, /// Policy parameters as JSON pub parameters: serde_json::Value, /// Confidence score from learning (0.0 - 1.0) @@ -247,9 +247,13 @@ impl PolicyStore { } /// Search for policies by semantic similarity - pub fn search(&self, query_embedding: &[f32], limit: usize) -> Result> { + pub fn search( + &self, + query_embedding: &QuantumVector, + limit: usize, + ) -> Result> { let query = SearchQuery { - vector: query_embedding.to_vec(), + vector: query_embedding.clone(), k: limit, filter: None, ef_search: None, @@ -301,7 +305,7 @@ impl PolicyStore { /// Store a quantization policy pub fn store_quantization_policy( &self, - embedding: Vec, + embedding: QuantumVector, policy: QuantizationPolicy, confidence: f32, source: PolicySource, @@ -325,7 +329,7 @@ impl PolicyStore { /// Store a router policy pub fn store_router_policy( &self, - embedding: Vec, + embedding: QuantumVector, policy: RouterPolicy, confidence: f32, source: PolicySource, @@ -377,7 +381,7 @@ impl PolicyStore { fn entry_from_metadata( &self, id: &str, - embedding: &[f32], + embedding: &QuantumVector, metadata: &HashMap, ) -> Option { let uuid = Uuid::parse_str(id).ok()?; @@ -416,7 +420,7 @@ impl PolicyStore { Some(PolicyEntry { id: uuid, policy_type, - embedding: embedding.to_vec(), + embedding: embedding.clone(), parameters, confidence, fisher_diagonal, diff --git a/crates/ruvllm/src/quality/coherence.rs b/crates/ruvllm/src/quality/coherence.rs index 89a3beffb..1a48ee2f4 100644 --- a/crates/ruvllm/src/quality/coherence.rs +++ b/crates/ruvllm/src/quality/coherence.rs @@ -4,6 +4,7 @@ //! detecting contradictions, and checking logical flow in generated content. use crate::error::Result; +use crate::utils::{compute_std_dev, cosine_similarity}; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -652,35 +653,6 @@ impl CoherenceValidator { } } -/// Compute cosine similarity between two vectors -fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { - if a.len() != b.len() || a.is_empty() { - return 0.0; - } - - let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); - let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); - let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); - - if norm_a == 0.0 || norm_b == 0.0 { - return 0.0; - } - - dot / (norm_a * norm_b) -} - -/// Compute standard deviation -fn compute_std_dev(values: &[f32], mean: f32) -> f32 { - if values.len() < 2 { - return 0.0; - } - - let variance: f32 = - values.iter().map(|v| (v - mean).powi(2)).sum::() / (values.len() - 1) as f32; - - variance.sqrt() -} - /// Extract numbers from text fn extract_numbers(text: &str) -> Vec { let mut numbers = Vec::new(); diff --git a/crates/ruvllm/src/quality/diversity.rs b/crates/ruvllm/src/quality/diversity.rs index f0daccca5..d33673024 100644 --- a/crates/ruvllm/src/quality/diversity.rs +++ b/crates/ruvllm/src/quality/diversity.rs @@ -3,6 +3,8 @@ //! This module provides tools for analyzing diversity in generated content, //! detecting mode collapse, and suggesting diversification strategies. +use crate::error::Result; +use crate::utils::cosine_similarity; use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; @@ -734,23 +736,6 @@ struct SemanticDiversityResult { average_distance: f32, } -/// Compute cosine similarity between two vectors -fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { - if a.len() != b.len() || a.is_empty() { - return 0.0; - } - - let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); - let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); - let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); - - if norm_a == 0.0 || norm_b == 0.0 { - return 0.0; - } - - dot / (norm_a * norm_b) -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/ruvllm/src/reasoning_bank/consolidation.rs b/crates/ruvllm/src/reasoning_bank/consolidation.rs index c691b395f..de53f71ac 100644 --- a/crates/ruvllm/src/reasoning_bank/consolidation.rs +++ b/crates/ruvllm/src/reasoning_bank/consolidation.rs @@ -31,6 +31,8 @@ pub struct ConsolidationConfig { pub max_unused_age_secs: u64, /// Enable automatic lambda adaptation pub auto_adapt_lambda: bool, + /// Minimum importance score to keep a pattern + pub min_importance_threshold: f32, } impl Default for ConsolidationConfig { @@ -45,34 +47,42 @@ impl Default for ConsolidationConfig { merge_similarity_threshold: 0.85, max_unused_age_secs: 86400 * 7, // 7 days auto_adapt_lambda: true, + min_importance_threshold: 0.2, } } } +use ruvector_core::types::QuantumVector; + /// Fisher information for a pattern dimension #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FisherInformation { /// Diagonal of the Fisher information matrix - pub diagonal: Vec, + pub diagonal: QuantumVector, /// Number of samples used to estimate pub sample_count: u64, /// Running EMA of squared gradients - pub ema_grad_squared: Vec, + pub ema_grad_squared: QuantumVector, } impl FisherInformation { /// Create new Fisher information pub fn new(dim: usize) -> Self { Self { - diagonal: vec![1.0; dim], + diagonal: QuantumVector::F32(vec![1.0; dim]), sample_count: 0, - ema_grad_squared: vec![0.0; dim], + ema_grad_squared: QuantumVector::F32(vec![0.0; dim]), } } /// Update with new gradient observation pub fn update(&mut self, gradient: &[f32], decay: f32) { - if gradient.len() != self.diagonal.len() { + let (mut diag, mut ema) = match (&mut self.diagonal, &mut self.ema_grad_squared) { + (QuantumVector::F32(d), QuantumVector::F32(e)) => (d, e), + _ => return, // Unsupported for now + }; + + if gradient.len() != diag.len() { return; } @@ -80,36 +90,53 @@ impl FisherInformation { for (i, &g) in gradient.iter().enumerate() { // EMA update: F_t = decay * F_{t-1} + (1 - decay) * g^2 - self.ema_grad_squared[i] = decay * self.ema_grad_squared[i] + (1.0 - decay) * g * g; - self.diagonal[i] = self.ema_grad_squared[i]; + ema[i] = decay * ema[i] + (1.0 - decay) * g * g; + diag[i] = ema[i]; } } /// Get importance score for a dimension pub fn importance(&self, dim: usize) -> f32 { - if dim < self.diagonal.len() { - self.diagonal[dim] - } else { - 0.0 + match &self.diagonal { + QuantumVector::F32(d) => { + if dim < d.len() { + d[dim] + } else { + 0.0 + } + } + _ => 0.0, } } /// Get total importance pub fn total_importance(&self) -> f32 { - self.diagonal.iter().sum() + match &self.diagonal { + QuantumVector::F32(d) => d.iter().sum(), + _ => 0.0, + } } /// Merge with another Fisher information (weighted average) pub fn merge(&mut self, other: &FisherInformation, self_weight: f32) { - if self.diagonal.len() != other.diagonal.len() { + let (diag_self, ema_self) = match (&mut self.diagonal, &mut self.ema_grad_squared) { + (QuantumVector::F32(d), QuantumVector::F32(e)) => (d, e), + _ => return, + }; + + let (diag_other, ema_other) = match (&other.diagonal, &other.ema_grad_squared) { + (QuantumVector::F32(d), QuantumVector::F32(e)) => (d, e), + _ => return, + }; + + if diag_self.len() != diag_other.len() { return; } let other_weight = 1.0 - self_weight; - for i in 0..self.diagonal.len() { - self.diagonal[i] = self.diagonal[i] * self_weight + other.diagonal[i] * other_weight; - self.ema_grad_squared[i] = - self.ema_grad_squared[i] * self_weight + other.ema_grad_squared[i] * other_weight; + for i in 0..diag_self.len() { + diag_self[i] = diag_self[i] * self_weight + diag_other[i] * other_weight; + ema_self[i] = ema_self[i] * self_weight + ema_other[i] * other_weight; } self.sample_count = ((self.sample_count as f32 * self_weight) @@ -169,7 +196,7 @@ impl ImportanceScore { // Fisher information factor if let Some(fi) = fisher { - factors.fisher_factor = (fi.total_importance() / fi.diagonal.len() as f32).min(1.0); + factors.fisher_factor = (fi.total_importance() / fi.dimension() as f32).min(1.0); } else { factors.fisher_factor = 0.5; // Default if no Fisher info } @@ -275,7 +302,8 @@ impl PatternConsolidator { .filter(|s| { let pattern = patterns.iter().find(|p| p.id == s.pattern_id); if let Some(p) = pattern { - s.score < 0.2 && p.avg_quality < self.config.min_quality_threshold + s.score < self.config.min_importance_threshold + && p.avg_quality < self.config.min_quality_threshold } else { false } @@ -446,9 +474,14 @@ impl PatternConsolidator { if let Some(fisher) = self.fisher_info.get(&pattern_id) { let mut loss = 0.0f32; - for i in 0..current_weights.len().min(fisher.diagonal.len()) { + let diag = match &fisher.diagonal { + QuantumVector::F32(d) => d, + _ => return 0.0, + }; + + for i in 0..current_weights.len().min(diag.len()) { let diff = current_weights[i] - optimal_weights[i]; - loss += fisher.diagonal[i] * diff * diff; + loss += diag[i] * diff * diff; } self.lambda * loss / 2.0 } else { @@ -484,7 +517,7 @@ impl PatternConsolidator { .fisher_info .values() .next() - .map(|f| f.diagonal.len()) + .map(|f| f.dimension()) .unwrap_or(0); if dim == 0 { return; @@ -493,15 +526,32 @@ impl PatternConsolidator { let mut consolidated = FisherInformation::new(dim); let count = self.fisher_info.len() as f32; + let (mut diag_cons, mut ema_cons) = match ( + &mut consolidated.diagonal, + &mut consolidated.ema_grad_squared, + ) { + (QuantumVector::F32(d), QuantumVector::F32(e)) => (d, e), + _ => return, + }; + for fisher in self.fisher_info.values() { - for (i, &val) in fisher.diagonal.iter().enumerate() { - if i < consolidated.diagonal.len() { - consolidated.diagonal[i] += val / count; + let diag = match &fisher.diagonal { + QuantumVector::F32(d) => d, + _ => continue, + }; + let ema = match &fisher.ema_grad_squared { + QuantumVector::F32(e) => e, + _ => continue, + }; + + for (i, &val) in diag.iter().enumerate() { + if i < diag_cons.len() { + diag_cons[i] += val / count; } } - for (i, &val) in fisher.ema_grad_squared.iter().enumerate() { - if i < consolidated.ema_grad_squared.len() { - consolidated.ema_grad_squared[i] += val / count; + for (i, &val) in ema.iter().enumerate() { + if i < ema_cons.len() { + ema_cons[i] += val / count; } } consolidated.sample_count += fisher.sample_count; @@ -546,12 +596,22 @@ pub struct ConsolidatorStats { pub total_consolidated: u64, } +impl FisherInformation { + pub fn dimension(&self) -> usize { + match &self.diagonal { + QuantumVector::F32(d) => d.len(), + _ => 0, + } + } +} + #[cfg(test)] mod tests { use super::*; use crate::reasoning_bank::pattern_store::PatternCategory; + use ruvector_core::types::QuantumVector; - fn make_pattern(id: u64, embedding: Vec, quality: f32, usage: u32) -> Pattern { + fn make_pattern(id: u64, embedding: QuantumVector, quality: f32, usage: u32) -> Pattern { let mut p = Pattern::new(embedding, PatternCategory::General, quality); p.id = id; p.usage_count = usage; @@ -580,7 +640,7 @@ mod tests { #[test] fn test_importance_score() { - let pattern = make_pattern(1, vec![0.1; 4], 0.8, 10); + let pattern = make_pattern(1, QuantumVector::F32(vec![0.1; 4]), 0.8, 10); let score = ImportanceScore::compute(&pattern, None, 86400); assert!(score.score > 0.0); @@ -605,9 +665,9 @@ mod tests { let consolidator = PatternConsolidator::new(config); let patterns = vec![ - make_pattern(1, vec![0.1; 4], 0.8, 10), // Keep (high quality) - make_pattern(2, vec![0.2; 4], 0.3, 2), // Prune (low quality, low usage) - make_pattern(3, vec![0.3; 4], 0.4, 8), // Keep (high usage) + make_pattern(1, QuantumVector::F32(vec![0.1; 4]), 0.8, 10), // Keep (high quality) + make_pattern(2, QuantumVector::F32(vec![0.2; 4]), 0.3, 2), // Prune (low quality, low usage) + make_pattern(3, QuantumVector::F32(vec![0.3; 4]), 0.4, 8), // Keep (high usage) ]; let pruned = consolidator.prune_low_quality(&patterns); @@ -621,9 +681,9 @@ mod tests { let consolidator = PatternConsolidator::new(config); let patterns = vec![ - make_pattern(1, vec![0.1; 4], 0.8, 10), - make_pattern(2, vec![0.2; 4], 0.1, 1), // Low quality - make_pattern(3, vec![0.3; 4], 0.7, 5), + make_pattern(1, QuantumVector::F32(vec![0.1; 4]), 0.8, 10), + make_pattern(2, QuantumVector::F32(vec![0.2; 4]), 0.1, 1), // Low quality + make_pattern(3, QuantumVector::F32(vec![0.3; 4]), 0.7, 5), ]; let result = consolidator.consolidate_patterns(&patterns).unwrap(); @@ -642,9 +702,9 @@ mod tests { // Very similar embeddings let patterns = vec![ - make_pattern(1, vec![1.0, 0.0, 0.0, 0.0], 0.8, 5), - make_pattern(2, vec![0.99, 0.01, 0.0, 0.0], 0.7, 3), // Very similar to 1 - make_pattern(3, vec![0.0, 1.0, 0.0, 0.0], 0.9, 10), // Different + make_pattern(1, QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), 0.8, 5), + make_pattern(2, QuantumVector::F32(vec![0.99, 0.01, 0.0, 0.0]), 0.7, 3), // Very similar to 1 + make_pattern(3, QuantumVector::F32(vec![0.0, 1.0, 0.0, 0.0]), 0.9, 10), // Different ]; let merged = consolidator @@ -706,9 +766,9 @@ mod tests { // Add patterns with high usage let patterns = vec![ - make_pattern(1, vec![0.1; 4], 0.8, 10), - make_pattern(2, vec![0.2; 4], 0.7, 8), - make_pattern(3, vec![0.3; 4], 0.9, 15), + make_pattern(1, QuantumVector::F32(vec![0.1; 4]), 0.8, 10), + make_pattern(2, QuantumVector::F32(vec![0.2; 4]), 0.7, 8), + make_pattern(3, QuantumVector::F32(vec![0.3; 4]), 0.9, 15), ]; consolidator.adapt_lambda(&patterns); diff --git a/crates/ruvllm/src/reasoning_bank/distillation.rs b/crates/ruvllm/src/reasoning_bank/distillation.rs index 5bdcd3dee..421f84fdb 100644 --- a/crates/ruvllm/src/reasoning_bank/distillation.rs +++ b/crates/ruvllm/src/reasoning_bank/distillation.rs @@ -48,8 +48,8 @@ impl Default for DistillationConfig { pub struct CompressedTrajectory { /// Original trajectory ID pub original_id: u64, - /// Key embedding (compressed representation) - pub key_embedding: Vec, + /// Key embedding (compressed representation) (Quantum) + pub key_embedding: ruvector_core::types::QuantumVector, /// Verdict pub verdict: Verdict, /// Quality score @@ -111,8 +111,8 @@ impl CompressedTrajectory { pub struct KeyLesson { /// Lesson content pub content: String, - /// Embedding for semantic search - pub embedding: Vec, + /// Embedding for semantic search (Quantum) + pub embedding: ruvector_core::types::QuantumVector, /// Source trajectory IDs pub source_trajectory_ids: Vec, /// Observation count (how many times seen) @@ -137,7 +137,11 @@ pub struct KeyLesson { impl KeyLesson { /// Create a new key lesson - pub fn new(content: String, embedding: Vec, category: PatternCategory) -> Self { + pub fn new( + content: String, + embedding: ruvector_core::types::QuantumVector, + category: PatternCategory, + ) -> Self { let now = Utc::now(); Self { content, @@ -207,24 +211,10 @@ impl KeyLesson { /// Compute embedding similarity pub fn embedding_similarity(&self, other: &KeyLesson) -> f32 { - if self.embedding.len() != other.embedding.len() || self.embedding.is_empty() { - return 0.0; - } - - let dot: f32 = self - .embedding - .iter() - .zip(&other.embedding) - .map(|(a, b)| a * b) - .sum(); - let norm_a: f32 = self.embedding.iter().map(|x| x * x).sum::().sqrt(); - let norm_b: f32 = other.embedding.iter().map(|x| x * x).sum::().sqrt(); - - if norm_a > 1e-8 && norm_b > 1e-8 { - dot / (norm_a * norm_b) - } else { - 0.0 - } + let a = self.embedding.reconstruct(); + let b = other.embedding.reconstruct(); + let dist = ruvector_core::distance::cosine_distance(&a, &b); + 1.0 - dist } } @@ -621,11 +611,12 @@ fn infer_category(trajectory: &Trajectory) -> PatternCategory { /// Estimate trajectory memory size fn estimate_trajectory_size(trajectory: &Trajectory) -> usize { let base_size = std::mem::size_of::(); - let embedding_size = trajectory.query_embedding.len() * std::mem::size_of::(); + let embedding_size = + trajectory.query_embedding.reconstruct().len() * std::mem::size_of::(); let response_embedding_size = trajectory .response_embedding .as_ref() - .map(|e| e.len() * std::mem::size_of::()) + .map(|e| e.reconstruct().len() * std::mem::size_of::()) .unwrap_or(0); let steps_size: usize = trajectory .steps @@ -649,9 +640,10 @@ fn estimate_trajectory_size(trajectory: &Trajectory) -> usize { mod tests { use super::super::trajectory::{StepOutcome, TrajectoryRecorder}; use super::*; + use ruvector_core::types::QuantumVector; fn make_trajectory(id: u64, quality: f32) -> Trajectory { - let mut recorder = TrajectoryRecorder::new(vec![0.1; 64]); + let mut recorder = TrajectoryRecorder::new(QuantumVector::F32(vec![0.1; 64])); recorder.add_step( "action1".to_string(), "rationale1".to_string(), @@ -699,7 +691,7 @@ mod tests { fn test_key_lesson_creation() { let lesson = KeyLesson::new( "Test lesson".to_string(), - vec![0.1; 64], + QuantumVector::F32(vec![0.1; 64]), PatternCategory::General, ); @@ -711,7 +703,7 @@ mod tests { fn test_key_lesson_merge() { let mut lesson1 = KeyLesson::new( "Test lesson".to_string(), - vec![0.1; 4], + QuantumVector::F32(vec![0.1; 4]), PatternCategory::General, ); lesson1.importance = 0.5; @@ -719,7 +711,7 @@ mod tests { let mut lesson2 = KeyLesson::new( "Test lesson".to_string(), - vec![0.2; 4], + QuantumVector::F32(vec![0.2; 4]), PatternCategory::General, ); lesson2.importance = 0.7; @@ -735,17 +727,17 @@ mod tests { fn test_lesson_similarity() { let lesson1 = KeyLesson::new( "Test lesson about code generation".to_string(), - vec![1.0, 0.0, 0.0, 0.0], + QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), PatternCategory::General, ); let lesson2 = KeyLesson::new( "Test lesson about code generation".to_string(), - vec![1.0, 0.0, 0.0, 0.0], + QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), PatternCategory::General, ); let lesson3 = KeyLesson::new( "Different topic entirely".to_string(), - vec![0.0, 1.0, 0.0, 0.0], + QuantumVector::F32(vec![0.0, 1.0, 0.0, 0.0]), PatternCategory::General, ); @@ -825,17 +817,17 @@ mod tests { let lessons = vec![ KeyLesson::new( "Test lesson one".to_string(), - vec![1.0, 0.0], + QuantumVector::F32(vec![1.0, 0.0]), PatternCategory::General, ), KeyLesson::new( "Test lesson one".to_string(), - vec![1.0, 0.0], + QuantumVector::F32(vec![1.0, 0.0]), PatternCategory::General, ), KeyLesson::new( "Different lesson".to_string(), - vec![0.0, 1.0], + QuantumVector::F32(vec![0.0, 1.0]), PatternCategory::General, ), ]; diff --git a/crates/ruvllm/src/reasoning_bank/mod.rs b/crates/ruvllm/src/reasoning_bank/mod.rs index dcb7c8e29..eac42239b 100644 --- a/crates/ruvllm/src/reasoning_bank/mod.rs +++ b/crates/ruvllm/src/reasoning_bank/mod.rs @@ -211,7 +211,10 @@ impl ReasoningBank { } /// Start recording a new trajectory - pub fn start_trajectory(&self, query_embedding: Vec) -> TrajectoryRecorder { + pub fn start_trajectory( + &self, + query_embedding: ruvector_core::types::QuantumVector, + ) -> TrajectoryRecorder { TrajectoryRecorder::new(query_embedding) } @@ -267,7 +270,7 @@ impl ReasoningBank { /// Search for similar patterns by embedding pub fn search_similar( &self, - query_embedding: &[f32], + query_embedding: &ruvector_core::types::QuantumVector, limit: usize, ) -> Result> { let store = self.pattern_store.read(); @@ -408,6 +411,7 @@ impl ReasoningBank { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; #[test] fn test_reasoning_bank_config_default() { @@ -433,7 +437,7 @@ mod tests { let config = ReasoningBankConfig::default(); let bank = ReasoningBank::new(config).unwrap(); - let mut recorder = bank.start_trajectory(vec![0.1; 768]); + let mut recorder = bank.start_trajectory(QuantumVector::F32(vec![0.1; 768])); recorder.add_step( "analyze".to_string(), "Need to understand the problem".to_string(), diff --git a/crates/ruvllm/src/reasoning_bank/pattern_store.rs b/crates/ruvllm/src/reasoning_bank/pattern_store.rs index 608f35162..8d806263d 100644 --- a/crates/ruvllm/src/reasoning_bank/pattern_store.rs +++ b/crates/ruvllm/src/reasoning_bank/pattern_store.rs @@ -115,8 +115,8 @@ pub struct Pattern { pub id: u64, /// UUID for external reference pub uuid: Uuid, - /// Pattern embedding (centroid) - pub embedding: Vec, + /// Pattern embedding (centroid) (Quantum) + pub embedding: ruvector_core::types::QuantumVector, /// Category pub category: PatternCategory, /// Confidence score (0.0 - 1.0) @@ -156,7 +156,11 @@ pub struct PatternMetadata { impl Pattern { /// Create a new pattern - pub fn new(embedding: Vec, category: PatternCategory, confidence: f32) -> Self { + pub fn new( + embedding: ruvector_core::types::QuantumVector, + category: PatternCategory, + confidence: f32, + ) -> Self { let now = Utc::now(); Self { id: PATTERN_COUNTER.fetch_add(1, Ordering::SeqCst), @@ -341,11 +345,16 @@ impl Pattern { let w1 = self.usage_count as f32 / total_count as f32; let w2 = other.usage_count as f32 / total_count as f32; - for (i, e) in self.embedding.iter_mut().enumerate() { - if i < other.embedding.len() { - *e = *e * w1 + other.embedding[i] * w2; + let v1 = self.embedding.reconstruct(); + let v2 = other.embedding.reconstruct(); + let mut merged_v = vec![0.0; v1.len()]; + + for (i, e) in merged_v.iter_mut().enumerate() { + if i < v2.len() { + *e = v1[i] * w1 + v2[i] * w2; } } + self.embedding = ruvector_core::types::QuantumVector::F32(merged_v); // Merge statistics self.usage_count = total_count; @@ -370,15 +379,18 @@ impl Pattern { self.last_accessed = Utc::now(); } - /// Compute cosine similarity with a query - pub fn similarity(&self, query: &[f32]) -> f32 { - if self.embedding.len() != query.len() { + /// Compute similarity with a query + pub fn similarity(&self, query: &ruvector_core::types::QuantumVector) -> f32 { + let v_p = self.embedding.reconstruct(); + let v_q = query.reconstruct(); + + if v_p.len() != v_q.len() { return 0.0; } - let dot: f32 = self.embedding.iter().zip(query).map(|(a, b)| a * b).sum(); - let norm_a: f32 = self.embedding.iter().map(|x| x * x).sum::().sqrt(); - let norm_b: f32 = query.iter().map(|x| x * x).sum::().sqrt(); + let dot: f32 = v_p.iter().zip(&v_q).map(|(a, b)| a * b).sum(); + let norm_a: f32 = v_p.iter().map(|x| x * x).sum::().sqrt(); + let norm_b: f32 = v_q.iter().map(|x| x * x).sum::().sqrt(); if norm_a > 1e-8 && norm_b > 1e-8 { dot / (norm_a * norm_b) @@ -540,13 +552,17 @@ impl PatternStore { } /// Search for similar patterns - pub fn search_similar(&self, query: &[f32], limit: usize) -> Result> { + pub fn search_similar( + &self, + query_embedding: &ruvector_core::types::QuantumVector, + limit: usize, + ) -> Result> { let start = std::time::Instant::now(); // Search HNSW index let results = { let search_query = SearchQuery { - vector: query.to_vec(), + vector: query_embedding.clone(), k: limit, filter: None, ef_search: Some(self.config.ef_search), @@ -794,10 +810,15 @@ impl PatternStore { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; #[test] fn test_pattern_creation() { - let pattern = Pattern::new(vec![0.1; 768], PatternCategory::Reasoning, 0.9); + let pattern = Pattern::new( + QuantumVector::F32(vec![0.1; 768]), + PatternCategory::Reasoning, + 0.9, + ); assert!(pattern.id > 0 || pattern.id == 0); // First pattern might be 0 assert_eq!(pattern.category, PatternCategory::Reasoning); @@ -806,25 +827,43 @@ mod tests { #[test] fn test_pattern_similarity() { - let pattern = Pattern::new(vec![1.0, 0.0, 0.0], PatternCategory::General, 0.9); + let pattern = Pattern::new( + QuantumVector::F32(vec![1.0, 0.0, 0.0]), + PatternCategory::General, + 0.9, + ); - assert!((pattern.similarity(&[1.0, 0.0, 0.0]) - 1.0).abs() < 1e-6); - assert!(pattern.similarity(&[0.0, 1.0, 0.0]).abs() < 1e-6); + assert!((pattern.similarity(&QuantumVector::F32(vec![1.0, 0.0, 0.0])) - 1.0).abs() < 1e-6); + assert!( + pattern + .similarity(&QuantumVector::F32(vec![0.0, 1.0, 0.0])) + .abs() + < 1e-6 + ); } #[test] fn test_pattern_merge() { - let mut p1 = Pattern::new(vec![1.0, 0.0], PatternCategory::General, 0.8); + let mut p1 = Pattern::new( + QuantumVector::F32(vec![1.0, 0.0]), + PatternCategory::General, + 0.8, + ); p1.usage_count = 10; - let mut p2 = Pattern::new(vec![0.0, 1.0], PatternCategory::General, 0.9); + let mut p2 = Pattern::new( + QuantumVector::F32(vec![0.0, 1.0]), + PatternCategory::General, + 0.9, + ); p2.usage_count = 10; p1.merge(&p2); assert_eq!(p1.usage_count, 20); - assert!((p1.embedding[0] - 0.5).abs() < 1e-6); - assert!((p1.embedding[1] - 0.5).abs() < 1e-6); + let v = p1.embedding.reconstruct(); + assert!((v[0] - 0.5).abs() < 1e-6); + assert!((v[1] - 0.5).abs() < 1e-6); } #[test] @@ -855,11 +894,17 @@ mod tests { let mut store = PatternStore::new(config).unwrap(); // Store pattern - let pattern = Pattern::new(vec![1.0, 0.0, 0.0, 0.0], PatternCategory::Reasoning, 0.9); + let pattern = Pattern::new( + QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), + PatternCategory::Reasoning, + 0.9, + ); let id = store.store_pattern(pattern).unwrap(); // Search - let results = store.search_similar(&[1.0, 0.0, 0.0, 0.0], 1).unwrap(); + let results = store + .search_similar(&QuantumVector::F32(vec![1.0, 0.0, 0.0, 0.0]), 1) + .unwrap(); assert!(!results.is_empty()); assert_eq!(results[0].pattern.id, id); diff --git a/crates/ruvllm/src/reasoning_bank/trajectory.rs b/crates/ruvllm/src/reasoning_bank/trajectory.rs index bfb7f0a3b..e183e188a 100644 --- a/crates/ruvllm/src/reasoning_bank/trajectory.rs +++ b/crates/ruvllm/src/reasoning_bank/trajectory.rs @@ -117,8 +117,8 @@ pub struct TrajectoryStep { pub latency_ms: u64, /// Timestamp when step was executed pub timestamp: DateTime, - /// Optional embedding of the action context - pub context_embedding: Option>, + /// Optional embedding of the action context (Quantum) + pub context_embedding: Option, /// Optional metadata pub metadata: Option, } @@ -169,7 +169,7 @@ impl TrajectoryStep { } /// Set context embedding - pub fn with_embedding(mut self, embedding: Vec) -> Self { + pub fn with_embedding(mut self, embedding: ruvector_core::types::QuantumVector) -> Self { self.context_embedding = Some(embedding); self } @@ -216,10 +216,10 @@ pub struct Trajectory { pub id: TrajectoryId, /// UUID for external reference pub uuid: Uuid, - /// Query embedding (input representation) - pub query_embedding: Vec, - /// Response embedding (output representation) - pub response_embedding: Option>, + /// Query embedding (input representation) (Quantum) + pub query_embedding: ruvector_core::types::QuantumVector, + /// Response embedding (output representation) (Quantum) + pub response_embedding: Option, /// Execution steps pub steps: Vec, /// Final verdict @@ -240,7 +240,7 @@ pub struct Trajectory { impl Trajectory { /// Create a new trajectory - pub fn new(query_embedding: Vec) -> Self { + pub fn new(query_embedding: ruvector_core::types::QuantumVector) -> Self { let now = Utc::now(); Self { id: TrajectoryId::new(), @@ -365,7 +365,7 @@ impl Trajectory { } /// Set response embedding - pub fn set_response_embedding(&mut self, embedding: Vec) { + pub fn set_response_embedding(&mut self, embedding: ruvector_core::types::QuantumVector) { self.response_embedding = Some(embedding); } } @@ -382,7 +382,7 @@ pub struct TrajectoryRecorder { impl TrajectoryRecorder { /// Create a new trajectory recorder - pub fn new(query_embedding: Vec) -> Self { + pub fn new(query_embedding: ruvector_core::types::QuantumVector) -> Self { Self { trajectory: Trajectory::new(query_embedding), current_step: 0, @@ -469,7 +469,7 @@ impl TrajectoryRecorder { } /// Set response embedding - pub fn set_response_embedding(&mut self, embedding: Vec) { + pub fn set_response_embedding(&mut self, embedding: ruvector_core::types::QuantumVector) { self.trajectory.set_response_embedding(embedding); } @@ -493,6 +493,7 @@ impl TrajectoryRecorder { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; #[test] fn test_trajectory_id_generation() { @@ -530,14 +531,14 @@ mod tests { #[test] fn test_trajectory_creation() { - let trajectory = Trajectory::new(vec![0.1; 768]); + let trajectory = Trajectory::new(QuantumVector::F32(vec![0.1; 768])); assert_eq!(trajectory.steps.len(), 0); assert!(!trajectory.is_success()); } #[test] fn test_trajectory_recorder() { - let mut recorder = TrajectoryRecorder::new(vec![0.1; 768]); + let mut recorder = TrajectoryRecorder::new(QuantumVector::F32(vec![0.1; 768])); recorder.set_session_id("session-1".to_string()); recorder.set_user_id("user-1".to_string()); @@ -564,7 +565,7 @@ mod tests { #[test] fn test_trajectory_quality_computation() { - let mut trajectory = Trajectory::new(vec![0.1; 768]); + let mut trajectory = Trajectory::new(QuantumVector::F32(vec![0.1; 768])); trajectory.add_step(TrajectoryStep::new( 0, @@ -595,7 +596,7 @@ mod tests { #[test] fn test_trajectory_stats() { - let mut recorder = TrajectoryRecorder::new(vec![0.1; 768]); + let mut recorder = TrajectoryRecorder::new(QuantumVector::F32(vec![0.1; 768])); recorder.add_step( "step1".to_string(), diff --git a/crates/ruvllm/src/reasoning_bank/verdicts.rs b/crates/ruvllm/src/reasoning_bank/verdicts.rs index e4bada635..d7fb66a9e 100644 --- a/crates/ruvllm/src/reasoning_bank/verdicts.rs +++ b/crates/ruvllm/src/reasoning_bank/verdicts.rs @@ -787,6 +787,7 @@ pub struct VerdictAnalyzerStats { mod tests { use super::super::trajectory::{StepOutcome, TrajectoryRecorder}; use super::*; + use ruvector_core::types::QuantumVector; #[test] fn test_verdict_creation() { @@ -830,7 +831,7 @@ mod tests { fn test_verdict_analysis() { let analyzer = VerdictAnalyzer::new(); - let mut recorder = TrajectoryRecorder::new(vec![0.1; 768]); + let mut recorder = TrajectoryRecorder::new(QuantumVector::F32(vec![0.1; 768])); recorder.add_step( "analyze".to_string(), "analyzing".to_string(), diff --git a/crates/ruvllm/src/ruvector_integration.rs b/crates/ruvllm/src/ruvector_integration.rs index 3c4c824d0..433c0dda5 100644 --- a/crates/ruvllm/src/ruvector_integration.rs +++ b/crates/ruvllm/src/ruvector_integration.rs @@ -52,11 +52,11 @@ use crate::capabilities::{ }; use crate::claude_flow::{AgentRouter, AgentType}; use crate::error::{Result, RuvLLMError}; -use crate::sona::{RoutingRecommendation, SonaConfig, SonaIntegration, SonaStats, Trajectory}; +use crate::sona::{SonaConfig, SonaIntegration, SonaStats, SonaTrajectory}; use parking_lot::RwLock; use ruvector_core::index::hnsw::HnswIndex; use ruvector_core::index::VectorIndex; -use ruvector_core::types::{DistanceMetric, HnswConfig, VectorId}; +use ruvector_core::types::{DistanceMetric, HnswConfig, QuantumVector, VectorId}; use ruvector_sona::{LearnedPattern, PatternConfig, ReasoningBank}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -229,7 +229,7 @@ impl UnifiedIndex { } /// Add a vector to the index - pub fn add(&self, id: VectorId, vector: Vec, metadata: VectorMetadata) -> Result<()> { + pub fn add(&self, id: VectorId, vector: QuantumVector, metadata: VectorMetadata) -> Result<()> { // Add to HNSW index { let mut hnsw = self.hnsw.write(); @@ -246,9 +246,8 @@ impl UnifiedIndex { Ok(()) } - /// Add a batch of vectors - pub fn add_batch(&self, entries: Vec<(VectorId, Vec, VectorMetadata)>) -> Result<()> { - let vectors: Vec<(VectorId, Vec)> = entries + pub fn add_batch(&self, entries: Vec<(VectorId, QuantumVector, VectorMetadata)>) -> Result<()> { + let vectors: Vec<(VectorId, QuantumVector)> = entries .iter() .map(|(id, vec, _)| (id.clone(), vec.clone())) .collect(); @@ -274,7 +273,7 @@ impl UnifiedIndex { } /// Search for similar vectors - pub fn search(&self, query: &[f32], k: usize) -> Result> { + pub fn search(&self, query: &QuantumVector, k: usize) -> Result> { let start = std::time::Instant::now(); let results = { @@ -313,41 +312,31 @@ impl UnifiedIndex { Ok(enriched) } - /// Search with attention-weighted similarity (if available) - #[cfg(feature = "attention")] pub fn search_with_attention( &self, - query: &[f32], + query: &QuantumVector, k: usize, - attention_context: Option<&[f32]>, + attention_context: Option<&QuantumVector>, ) -> Result> { // Apply attention-weighted transformation if context provided let effective_query = if let Some(ctx) = attention_context { // Simplified attention: weighted combination let alpha = 0.7; // Query weight - query + let q_vec = query.reconstruct(); + let c_vec = ctx.reconstruct(); + let combined = q_vec .iter() - .zip(ctx.iter()) + .zip(c_vec.iter()) .map(|(q, c)| alpha * q + (1.0 - alpha) * c) - .collect::>() + .collect::>(); + QuantumVector::F32(combined) } else { - query.to_vec() + query.clone() }; self.search(&effective_query, k) } - /// Search without attention (fallback) - #[cfg(not(feature = "attention"))] - pub fn search_with_attention( - &self, - query: &[f32], - k: usize, - _attention_context: Option<&[f32]>, - ) -> Result> { - self.search(query, k) - } - /// Get index statistics pub fn stats(&self) -> IndexStats { IndexStats { @@ -476,7 +465,11 @@ impl IntelligenceLayer { } /// Route a task to the optimal agent with full reasoning - pub fn route(&self, task_description: &str, embedding: &[f32]) -> IntelligentRoutingDecision { + pub fn route( + &self, + task_description: &str, + embedding: &QuantumVector, + ) -> IntelligentRoutingDecision { self.stats.routing_decisions.fetch_add(1, Ordering::SeqCst); let mut reasoning = Vec::new(); @@ -503,7 +496,8 @@ impl IntelligenceLayer { let mut influencing_patterns: Vec = Vec::new(); { let rb = self.index.reasoning_bank().read(); - let patterns = rb.find_similar(embedding, 5); + let q_vec = embedding.reconstruct(); + let patterns = rb.find_similar(&q_vec, 5); influencing_patterns = patterns.into_iter().cloned().collect(); } @@ -588,11 +582,10 @@ impl IntelligenceLayer { } } - /// Learn from task outcome pub fn learn_from_outcome( &self, task_description: &str, - embedding: &[f32], + embedding: &QuantumVector, agent_used: AgentType, success: bool, quality_score: f32, @@ -600,11 +593,11 @@ impl IntelligenceLayer { self.stats.learning_updates.fetch_add(1, Ordering::SeqCst); // Record trajectory for SONA learning - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: uuid::Uuid::new_v4().to_string(), session_id: "ruvector-integration".to_string(), - query_embedding: embedding.to_vec(), - response_embedding: embedding.to_vec(), + query_embedding: embedding.clone(), + response_embedding: embedding.clone(), quality_score, routing_features: vec![ agent_used as u8 as f32 / 10.0, @@ -637,7 +630,7 @@ impl IntelligenceLayer { }; let id = format!("pattern-{}", uuid::Uuid::new_v4()); - self.index.add(id, embedding.to_vec(), metadata)?; + self.index.add(id, embedding.clone(), metadata)?; self.stats .successful_routings @@ -832,7 +825,7 @@ impl RuvectorIntegration { pub fn route_with_intelligence( &self, task: &str, - embedding: &[f32], + embedding: &QuantumVector, ) -> IntelligentRoutingDecision { self.intelligence.route(task, embedding) } @@ -854,7 +847,7 @@ impl RuvectorIntegration { pub fn learn_from_outcome( &self, task: &str, - embedding: &[f32], + embedding: &QuantumVector, agent: AgentType, success: bool, quality: f32, @@ -887,7 +880,7 @@ impl RuvectorIntegration { } /// Search unified index - pub fn search(&self, query: &[f32], k: usize) -> Result> { + pub fn search(&self, query: &QuantumVector, k: usize) -> Result> { self.unified_index.search(query, k) } @@ -895,7 +888,7 @@ impl RuvectorIntegration { pub fn add_vector( &self, id: VectorId, - vector: Vec, + vector: QuantumVector, metadata: VectorMetadata, ) -> Result<()> { self.unified_index.add(id, vector, metadata) @@ -917,29 +910,42 @@ impl RuvectorIntegration { /// Get feature-gated attention computation #[cfg(feature = "attention")] - pub fn compute_attention(&self, query: &[f32], keys: &[&[f32]], values: &[&[f32]]) -> Vec { + pub fn compute_attention( + &self, + query: &QuantumVector, + keys: &[&QuantumVector], + values: &[&QuantumVector], + ) -> Vec { use ruvector_attention::{traits::Attention, ScaledDotProductAttention}; - let attention = ScaledDotProductAttention::new(query.len()); - attention.compute(query, keys, values).unwrap_or_default() + let q_vec = query.reconstruct(); + let k_vecs: Vec<&[f32]> = keys.iter().map(|k| k.reconstruct_ref()).collect(); + let v_vecs: Vec<&[f32]> = values.iter().map(|v| v.reconstruct_ref()).collect(); + + let attention = ScaledDotProductAttention::new(q_vec.len()); + attention + .compute(&q_vec, &k_vecs, &v_vecs) + .unwrap_or_default() } #[cfg(not(feature = "attention"))] pub fn compute_attention( &self, - query: &[f32], - _keys: &[&[f32]], - values: &[&[f32]], + query: &QuantumVector, + _keys: &[&QuantumVector], + values: &[&QuantumVector], ) -> Vec { // Fallback: average of values if values.is_empty() { - return query.to_vec(); + return query.reconstruct(); } - let dim = query.len(); + let q_vec = query.reconstruct(); + let dim = q_vec.len(); let mut result = vec![0.0; dim]; for v in values { - for (i, val) in v.iter().take(dim).enumerate() { + let v_vec = v.reconstruct(); + for (i, val) in v_vec.iter().take(dim).enumerate() { result[i] += val; } } @@ -964,9 +970,10 @@ pub struct IntegrationStats { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; - fn test_embedding() -> Vec { - vec![0.1; 768] + fn test_embedding() -> QuantumVector { + QuantumVector::F32(vec![0.1; 768]) } #[test] @@ -991,7 +998,7 @@ mod tests { }; let index = UnifiedIndex::new(config).unwrap(); - let embedding = vec![0.1; 128]; + let embedding = QuantumVector::F32(vec![0.1; 128]); let metadata = VectorMetadata { source: "test".to_string(), ..Default::default() @@ -1014,7 +1021,7 @@ mod tests { }; let intelligence = IntelligenceLayer::new(config).unwrap(); - let embedding = vec![0.1; 128]; + let embedding = QuantumVector::F32(vec![0.1; 128]); let decision = intelligence.route("implement a REST API", &embedding); assert!(decision.confidence > 0.0); @@ -1045,7 +1052,7 @@ mod tests { }; let integration = RuvectorIntegration::new(config).unwrap(); - let embedding = vec![0.1; 128]; + let embedding = QuantumVector::F32(vec![0.1; 128]); let decision = integration.route_with_intelligence("write unit tests", &embedding); assert!(decision.confidence > 0.0); @@ -1060,7 +1067,7 @@ mod tests { }; let integration = RuvectorIntegration::new(config).unwrap(); - let embedding = vec![0.1; 128]; + let embedding = QuantumVector::F32(vec![0.1; 128]); integration .learn_from_outcome("test task", &embedding, AgentType::Tester, true, 0.9) .unwrap(); diff --git a/crates/ruvllm/src/session_index.rs b/crates/ruvllm/src/session_index.rs index f8e69d3ae..bffe2c520 100644 --- a/crates/ruvllm/src/session_index.rs +++ b/crates/ruvllm/src/session_index.rs @@ -181,7 +181,7 @@ impl SessionIndex { // Create vector entry let vector_entry = VectorEntry { id: Some(state.session_id.clone()), - vector: state.context_embedding.clone(), + vector: ruvector_core::types::QuantumVector::F32(state.context_embedding.clone()), metadata: Some(metadata), }; @@ -200,7 +200,7 @@ impl SessionIndex { limit: usize, ) -> Result> { let query = SearchQuery { - vector: context_embedding.to_vec(), + vector: ruvector_core::types::QuantumVector::F32(context_embedding.to_vec()), k: limit, filter: None, ef_search: None, diff --git a/crates/ruvllm/src/sona/integration.rs b/crates/ruvllm/src/sona/integration.rs index 8ac6831cc..f81a247a8 100644 --- a/crates/ruvllm/src/sona/integration.rs +++ b/crates/ruvllm/src/sona/integration.rs @@ -38,6 +38,7 @@ use crate::error::{Result, RuvLLMError}; use crate::policy_store::{PolicyEntry, PolicySource, PolicyStore, PolicyType}; use crate::witness_log::WitnessEntry; use parking_lot::RwLock; +use ruvector_core::types::QuantumVector; use ruvector_sona::{ EwcConfig, EwcPlusPlus, LearnedPattern, PatternConfig, ReasoningBank, SonaConfig as SonaCoreConfig, SonaEngine, @@ -104,15 +105,15 @@ pub enum LearningLoop { /// Learning trajectory for SONA #[derive(Debug, Clone)] -pub struct Trajectory { +pub struct SonaTrajectory { /// Request ID pub request_id: String, /// Session ID pub session_id: String, /// Query embedding - pub query_embedding: Vec, + pub query_embedding: QuantumVector, /// Response embedding - pub response_embedding: Vec, + pub response_embedding: QuantumVector, /// Quality score pub quality_score: f32, /// Routing decision features @@ -135,7 +136,7 @@ pub struct SonaIntegration { /// ReasoningBank for pattern storage reasoning_bank: Arc>, /// Trajectory buffer for instant loop - trajectory_buffer: Arc>>, + trajectory_buffer: Arc>>, /// Total trajectories processed total_trajectories: AtomicU64, /// Instant loop updates @@ -153,6 +154,7 @@ pub struct SonaIntegration { impl SonaIntegration { /// Create a new SONA integration pub fn new(config: SonaConfig) -> Self { + println!("[DEBUG] SonaIntegration::new: Start"); let core_config = SonaCoreConfig { hidden_dim: config.hidden_dim, embedding_dim: config.embedding_dim, @@ -165,8 +167,12 @@ impl SonaIntegration { ..Default::default() }; + println!("[DEBUG] SonaIntegration::new: Creating SonaEngine"); + let engine = SonaEngine::with_config(core_config); + println!("[DEBUG] SonaIntegration::new: Creating EwcPlusPlus"); + let ewc_config = EwcConfig { param_count: config.hidden_dim, initial_lambda: config.ewc_lambda, @@ -174,6 +180,8 @@ impl SonaIntegration { }; let ewc = EwcPlusPlus::new(ewc_config); + println!("[DEBUG] SonaIntegration::new: Creating ReasoningBank"); + let pattern_config = PatternConfig { k_clusters: 100, embedding_dim: config.embedding_dim.min(256), // PatternConfig uses smaller embedding dim @@ -183,6 +191,8 @@ impl SonaIntegration { }; let reasoning_bank = ReasoningBank::new(pattern_config); + println!("[DEBUG] SonaIntegration::new: Finalizing struct"); + Self { config, engine: Arc::new(RwLock::new(engine)), @@ -199,7 +209,7 @@ impl SonaIntegration { } /// Record a trajectory for learning - pub fn record_trajectory(&self, trajectory: Trajectory) -> Result<()> { + pub fn record_trajectory(&self, trajectory: SonaTrajectory) -> Result<()> { self.total_trajectories.fetch_add(1, Ordering::SeqCst); // Add to buffer @@ -230,15 +240,15 @@ impl SonaIntegration { } /// Run instant loop (per-request, <1ms target) - fn run_instant_loop(&self, trajectory: &Trajectory) -> Result<()> { + fn run_instant_loop(&self, trajectory: &SonaTrajectory) -> Result<()> { let mut engine = self.engine.write(); // Begin trajectory in SONA engine - let mut builder = engine.begin_trajectory(trajectory.query_embedding.clone()); + let mut builder = engine.begin_trajectory(trajectory.query_embedding.reconstruct()); // Add step with routing features builder.add_step( - trajectory.response_embedding.clone(), + trajectory.response_embedding.reconstruct(), trajectory.routing_features.clone(), trajectory.quality_score, ); @@ -287,7 +297,7 @@ impl SonaIntegration { // Create a QueryTrajectory from our Trajectory let query_traj = ruvector_sona::QueryTrajectory::new( traj.request_id.parse().unwrap_or(0), - traj.query_embedding.clone(), + traj.query_embedding.reconstruct(), ); rb.add_trajectory(&query_traj); } @@ -334,14 +344,15 @@ impl SonaIntegration { } /// Compute pseudo-gradients for EWC++ (simplified) - fn compute_pseudo_gradients(&self, trajectory: &Trajectory) -> Vec { + fn compute_pseudo_gradients(&self, trajectory: &SonaTrajectory) -> Vec { // In production, this would compute actual gradients from the model // Here we use a simplified version based on embedding differences let mut gradients = vec![0.0; self.config.hidden_dim]; if trajectory.query_embedding.len() >= self.config.hidden_dim { for (i, g) in gradients.iter_mut().enumerate() { - *g = trajectory.query_embedding[i] * trajectory.quality_score; + let query = trajectory.query_embedding.reconstruct(); + *g = query[i] * trajectory.quality_score; } } @@ -349,9 +360,13 @@ impl SonaIntegration { } /// Search for similar patterns in ReasoningBank - pub fn search_patterns(&self, query: &[f32], limit: usize) -> Vec { + pub fn search_patterns(&self, query: &QuantumVector, limit: usize) -> Vec { let rb = self.reasoning_bank.read(); - rb.find_similar(query, limit).into_iter().cloned().collect() + let q_vec = query.reconstruct(); + rb.find_similar(&q_vec, limit) + .into_iter() + .cloned() + .collect() } /// Apply learned transformations to input @@ -363,7 +378,10 @@ impl SonaIntegration { } /// Get router recommendations based on learned patterns - pub fn get_routing_recommendation(&self, query_embedding: &[f32]) -> RoutingRecommendation { + pub fn get_routing_recommendation( + &self, + query_embedding: &QuantumVector, + ) -> RoutingRecommendation { let patterns = self.search_patterns(query_embedding, 5); if patterns.is_empty() { @@ -375,9 +393,10 @@ impl SonaIntegration { patterns.iter().map(|p| p.avg_quality).sum::() / patterns.len() as f32; // Calculate confidence from pattern similarity + let q_vec = query_embedding.reconstruct(); let confidence = patterns .first() - .map(|p| p.similarity(query_embedding)) + .map(|p| p.similarity(&q_vec)) .unwrap_or(0.5); RoutingRecommendation { @@ -396,7 +415,7 @@ impl SonaIntegration { /// Record a witness entry and extract trajectory pub fn record_from_witness(&self, entry: &WitnessEntry) -> Result<()> { - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: entry.request_id.to_string(), session_id: entry.session_id.clone(), query_embedding: entry.query_embedding.clone(), @@ -430,7 +449,7 @@ impl SonaIntegration { let entry = PolicyEntry { id: uuid::Uuid::new_v4(), policy_type: PolicyType::Pattern, - embedding: pattern.centroid.clone(), + embedding: ruvector_core::types::QuantumVector::F32(pattern.centroid.clone()), parameters: serde_json::json!({ "avg_quality": pattern.avg_quality, "cluster_size": pattern.cluster_size, @@ -510,6 +529,7 @@ pub struct SonaStats { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; #[test] fn test_sona_config_default() { @@ -534,7 +554,7 @@ mod tests { let config = SonaConfig::default(); let sona = SonaIntegration::new(config); - let query = vec![0.1; 256]; // Use smaller embedding for pattern config + let query = QuantumVector::F32(vec![0.1; 256]); // Use smaller embedding for pattern config let rec = sona.get_routing_recommendation(&query); // With no patterns, should return defaults @@ -550,11 +570,11 @@ mod tests { }; let sona = SonaIntegration::new(config); - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: "req-1".to_string(), session_id: "sess-1".to_string(), - query_embedding: vec![0.1; 256], - response_embedding: vec![0.2; 256], + query_embedding: QuantumVector::F32(vec![0.1; 256]), + response_embedding: QuantumVector::F32(vec![0.2; 256]), quality_score: 0.8, routing_features: vec![0.7, 0.9, 0.5, 0.5], model_index: 1, diff --git a/crates/ruvllm/src/sona/mod.rs b/crates/ruvllm/src/sona/mod.rs index 168369210..610ffb9e1 100644 --- a/crates/ruvllm/src/sona/mod.rs +++ b/crates/ruvllm/src/sona/mod.rs @@ -83,7 +83,7 @@ pub mod ruvltra_pretrain; // Re-export integration types (primary API) pub use integration::{ - LearningLoop, RoutingRecommendation, SonaConfig, SonaIntegration, SonaStats, Trajectory, + LearningLoop, RoutingRecommendation, SonaConfig, SonaIntegration, SonaStats, SonaTrajectory, }; // Re-export pretraining types diff --git a/crates/ruvllm/src/tests/witness_log_tests.rs b/crates/ruvllm/src/tests/witness_log_tests.rs index 42eaa13c5..4a27d31c1 100644 --- a/crates/ruvllm/src/tests/witness_log_tests.rs +++ b/crates/ruvllm/src/tests/witness_log_tests.rs @@ -7,6 +7,7 @@ use crate::types::ModelSize; use crate::witness_log::{ AsyncWriteConfig, LatencyBreakdown, RoutingDecision, WitnessEntry, WitnessLog, }; +use ruvector_core::types::QuantumVector; use std::time::Instant; // ============================================================================ @@ -159,7 +160,7 @@ fn test_routing_decision_serialization() { fn test_witness_entry_new() { let entry = WitnessEntry::new( "session-123".to_string(), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); @@ -176,7 +177,7 @@ fn test_witness_entry_new() { fn test_witness_entry_with_quality() { let entry = WitnessEntry::new( "session-456".to_string(), - vec![0.5; 768], + QuantumVector::F32(vec![0.5; 768]), RoutingDecision::default(), ) .with_quality(0.85); @@ -199,7 +200,7 @@ fn test_witness_entry_with_latency() { let entry = WitnessEntry::new( "session-789".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ) .with_latency(latency); @@ -221,7 +222,7 @@ fn test_witness_entry_with_error() { let entry = WitnessEntry::new( "session-error".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ) .with_error(error); @@ -235,7 +236,7 @@ fn test_witness_entry_with_error() { fn test_witness_entry_quality_threshold_edge_cases() { let entry_zero = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ) .with_quality(0.0); @@ -245,7 +246,7 @@ fn test_witness_entry_quality_threshold_edge_cases() { let entry_one = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ) .with_quality(1.0); @@ -259,7 +260,7 @@ fn test_witness_entry_timestamp() { let before = chrono::Utc::now(); let entry = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ); let after = chrono::Utc::now(); @@ -270,8 +271,16 @@ fn test_witness_entry_timestamp() { #[test] fn test_witness_entry_unique_ids() { - let entry1 = WitnessEntry::new("s1".to_string(), vec![0.0; 768], RoutingDecision::default()); - let entry2 = WitnessEntry::new("s1".to_string(), vec![0.0; 768], RoutingDecision::default()); + let entry1 = WitnessEntry::new( + "s1".to_string(), + QuantumVector::F32(vec![0.0; 768]), + RoutingDecision::default(), + ); + let entry2 = WitnessEntry::new( + "s1".to_string(), + QuantumVector::F32(vec![0.0; 768]), + RoutingDecision::default(), + ); // Each entry should have unique request_id assert_ne!(entry1.request_id, entry2.request_id); @@ -320,7 +329,7 @@ fn test_writeback_batching_behavior() { for i in 0..15 { let entry = WitnessEntry::new( format!("session-{}", i), - vec![i as f32 / 100.0; 768], + QuantumVector::F32(vec![i as f32 / 100.0; 768]), RoutingDecision::default(), ); batch.push(entry); @@ -451,7 +460,7 @@ fn test_concurrent_entry_creation() { for _ in 0..100 { let _ = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ); counter_clone.fetch_add(1, Ordering::Relaxed); @@ -481,7 +490,7 @@ fn test_unique_ids_concurrent() { for _ in 0..100 { let entry = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ); ids_clone.lock().unwrap().insert(entry.request_id); @@ -507,7 +516,7 @@ fn test_witness_entry_error_chain() { let entry = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ) .with_quality(0.5) @@ -541,7 +550,7 @@ fn test_witness_entry_error_chain() { fn test_witness_entry_tags() { let mut entry = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ); @@ -559,7 +568,7 @@ fn test_witness_entry_filter_by_tag() { .map(|i| { let mut entry = WitnessEntry::new( format!("session-{}", i), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ); if i % 2 == 0 { @@ -591,7 +600,7 @@ fn test_entry_creation_performance() { for _ in 0..iterations { let _ = WitnessEntry::new( "session".to_string(), - vec![0.0; 768], + QuantumVector::F32(vec![0.0; 768]), RoutingDecision::default(), ); } @@ -640,7 +649,7 @@ fn test_latency_breakdown_performance() { fn test_empty_embedding() { let entry = WitnessEntry::new( "session".to_string(), - vec![], // Empty embedding + QuantumVector::F32(vec![]), // Empty embedding RoutingDecision::default(), ); @@ -649,11 +658,11 @@ fn test_empty_embedding() { #[test] fn test_large_embedding() { - let large_embedding = vec![0.1; 4096]; // 4K dimension embedding + let large_embedding = QuantumVector::F32(vec![0.1; 4096]); // 4K dimension embedding let entry = WitnessEntry::new( "session".to_string(), - large_embedding.clone(), + large_embedding, RoutingDecision::default(), ); @@ -662,7 +671,11 @@ fn test_large_embedding() { #[test] fn test_empty_session_id() { - let entry = WitnessEntry::new("".to_string(), vec![0.0; 768], RoutingDecision::default()); + let entry = WitnessEntry::new( + "".to_string(), + QuantumVector::F32(vec![0.0; 768]), + RoutingDecision::default(), + ); assert!(entry.session_id.is_empty()); } @@ -671,7 +684,11 @@ fn test_empty_session_id() { fn test_long_session_id() { let long_id = "x".repeat(1000); - let entry = WitnessEntry::new(long_id.clone(), vec![0.0; 768], RoutingDecision::default()); + let entry = WitnessEntry::new( + long_id.clone(), + QuantumVector::F32(vec![0.0; 768]), + RoutingDecision::default(), + ); assert_eq!(entry.session_id.len(), 1000); } diff --git a/crates/ruvllm/src/utils.rs b/crates/ruvllm/src/utils.rs new file mode 100644 index 000000000..907526e92 --- /dev/null +++ b/crates/ruvllm/src/utils.rs @@ -0,0 +1,40 @@ +//! Common utilities for RuvLLM + +/// Compute cosine similarity between two vectors. +pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + + let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + let norm_a: f32 = a.iter().map(|x| x * x).sum::().sqrt(); + let norm_b: f32 = b.iter().map(|x| x * x).sum::().sqrt(); + + if norm_a < 1e-8 || norm_b < 1e-8 { + return 0.0; + } + + dot / (norm_a * norm_b) +} + +/// Normalize a vector to unit length (L2 norm). +pub fn l2_normalize(v: &mut [f32]) { + let norm: f32 = v.iter().map(|x| x * x).sum::().sqrt(); + if norm > 1e-8 { + for x in v.iter_mut() { + *x /= norm; + } + } +} + +/// Compute standard deviation of a slice of values. +pub fn compute_std_dev(values: &[f32], mean: f32) -> f32 { + if values.len() < 2 { + return 0.0; + } + + let variance: f32 = + values.iter().map(|v| (v - mean).powi(2)).sum::() / (values.len() - 1) as f32; + + variance.sqrt() +} diff --git a/crates/ruvllm/src/witness_log.rs b/crates/ruvllm/src/witness_log.rs index c867bbf20..1587bf827 100644 --- a/crates/ruvllm/src/witness_log.rs +++ b/crates/ruvllm/src/witness_log.rs @@ -144,8 +144,8 @@ pub struct WitnessEntry { pub request_id: Uuid, /// Associated session ID pub session_id: String, - /// Query embedding for semantic search (768-D) - pub query_embedding: Vec, + /// Query embedding for semantic search (Quantum) + pub query_embedding: ruvector_core::types::QuantumVector, /// Routing decision made pub routing_decision: RoutingDecision, /// Model used for generation @@ -156,8 +156,8 @@ pub struct WitnessEntry { pub latency: LatencyBreakdown, /// Context documents retrieved pub context_doc_ids: Vec, - /// Response embedding for clustering - pub response_embedding: Vec, + /// Response embedding for clustering (Quantum) + pub response_embedding: ruvector_core::types::QuantumVector, /// Timestamp pub timestamp: DateTime, /// Error details if failed @@ -172,7 +172,7 @@ impl WitnessEntry { /// Create a new witness entry pub fn new( session_id: String, - query_embedding: Vec, + query_embedding: ruvector_core::types::QuantumVector, routing_decision: RoutingDecision, ) -> Self { Self { @@ -184,7 +184,7 @@ impl WitnessEntry { quality_score: 0.0, latency: LatencyBreakdown::default(), context_doc_ids: Vec::new(), - response_embedding: Vec::new(), + response_embedding: ruvector_core::types::QuantumVector::F32(Vec::new()), timestamp: Utc::now(), error: None, quality_metrics: None, @@ -501,7 +501,7 @@ impl WitnessLog { let vector_entry = VectorEntry { id: Some(entry.request_id.to_string()), - vector: entry.query_embedding, + vector: entry.query_embedding.clone(), metadata: Some(metadata), }; @@ -525,9 +525,9 @@ impl WitnessLog { } /// Search witness logs by semantic similarity - pub fn search(&self, query_embedding: &[f32], limit: usize) -> Result> { + pub fn search(&self, query_embedding: &ruvector_core::types::QuantumVector, limit: usize) -> Result> { let query = SearchQuery { - vector: query_embedding.to_vec(), + vector: query_embedding.clone(), k: limit, filter: None, ef_search: None, @@ -539,9 +539,10 @@ impl WitnessLog { .map_err(|e| RuvLLMError::Storage(e.to_string()))?; let mut entries = Vec::with_capacity(results.len()); + let query_reconstructed = query_embedding.reconstruct(); for result in results { if let Some(metadata) = &result.metadata { - if let Some(entry) = self.entry_from_metadata(&result.id, query_embedding, metadata) + if let Some(entry) = self.entry_from_metadata(&result.id, &query_reconstructed, metadata) { entries.push(entry); } @@ -648,13 +649,13 @@ impl WitnessLog { Some(WitnessEntry { request_id, session_id, - query_embedding: embedding.to_vec(), + query_embedding: ruvector_core::types::QuantumVector::F32(embedding.to_vec()), routing_decision, model_used, quality_score, latency, context_doc_ids: Vec::new(), - response_embedding: Vec::new(), + response_embedding: ruvector_core::types::QuantumVector::F32(Vec::new()), timestamp, error, quality_metrics, @@ -849,6 +850,7 @@ impl WitnessLog { #[cfg(test)] mod tests { use super::*; + use ruvector_core::types::QuantumVector; #[test] fn test_latency_breakdown() { @@ -872,7 +874,7 @@ mod tests { fn test_witness_entry() { let entry = WitnessEntry::new( "session-1".to_string(), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); @@ -919,7 +921,7 @@ mod tests { for i in 0..4 { let entry = WitnessEntry::new( format!("session-{}", i), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); assert!(queue.push(entry)); @@ -932,7 +934,7 @@ mod tests { // Add one more to trigger batch size let entry = WitnessEntry::new( "session-4".to_string(), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); assert!(queue.push(entry)); @@ -961,7 +963,7 @@ mod tests { for i in 0..10 { let entry = WitnessEntry::new( format!("session-{}", i), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); assert!(queue.push(entry), "Entry {} should be accepted", i); @@ -970,7 +972,7 @@ mod tests { // Next entry should be dropped let entry = WitnessEntry::new( "session-overflow".to_string(), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); assert!( @@ -982,7 +984,7 @@ mod tests { // Another dropped entry let entry2 = WitnessEntry::new( "session-overflow-2".to_string(), - vec![0.1; 768], + QuantumVector::F32(vec![0.1; 768]), RoutingDecision::default(), ); assert!(!queue.push(entry2)); @@ -1007,7 +1009,7 @@ mod tests { for i in 0..3 { let entry = WitnessEntry::new( format!("session-{}", i), - vec![0.1; 64], + QuantumVector::F32(vec![0.1; 64]), RoutingDecision::default(), ); log.record(entry).unwrap(); @@ -1052,7 +1054,7 @@ mod tests { for i in 0..10 { let entry = WitnessEntry::new( format!("async-session-{}", i), - vec![0.1; 64], + QuantumVector::F32(vec![0.1; 64]), RoutingDecision::default(), ); log.record_async(entry).await.unwrap(); @@ -1086,7 +1088,7 @@ mod tests { .map(|i| { WitnessEntry::new( format!("batch-session-{}", i), - vec![0.1; 64], + QuantumVector::F32(vec![0.1; 64]), RoutingDecision::default(), ) }) @@ -1113,7 +1115,7 @@ mod tests { for i in 0..5 { let entry = WitnessEntry::new( format!("flush-session-{}", i), - vec![0.1; 64], + QuantumVector::F32(vec![0.1; 64]), RoutingDecision::default(), ); log.record(entry).unwrap(); diff --git a/crates/ruvllm/tests/check_sizes.rs b/crates/ruvllm/tests/check_sizes.rs new file mode 100644 index 000000000..d684f54d8 --- /dev/null +++ b/crates/ruvllm/tests/check_sizes.rs @@ -0,0 +1,20 @@ +use ruvector_sona::engine::SonaEngine; +use ruvector_sona::ewc::EwcPlusPlus; +use ruvector_sona::reasoning_bank::ReasoningBank; +use ruvllm::sona::{SonaConfig, SonaIntegration}; +use std::mem::size_of; + +#[test] +fn test_print_sizes() { + println!("Size of SonaConfig: {} bytes", size_of::()); + println!("Size of SonaEngine: {} bytes", size_of::()); + println!("Size of EwcPlusPlus: {} bytes", size_of::()); + println!( + "Size of ReasoningBank: {} bytes", + size_of::() + ); + println!( + "Size of SonaIntegration: {} bytes", + size_of::() + ); +} diff --git a/crates/ruvllm/tests/e2e_integration.rs b/crates/ruvllm/tests/e2e_integration.rs index 1eae55d3b..37d4bc459 100644 --- a/crates/ruvllm/tests/e2e_integration.rs +++ b/crates/ruvllm/tests/e2e_integration.rs @@ -25,7 +25,7 @@ use ruvllm::{ paged_attention::{PagedAttention, PagedAttentionConfig}, policy_store::{PolicyEntry, PolicySource, PolicyStore, PolicyType, QuantizationPolicy}, session::{SessionConfig, SessionManager}, - sona::{LearningLoop, SonaConfig, SonaIntegration, Trajectory}, + sona::{LearningLoop, SonaConfig, SonaIntegration, SonaTrajectory}, types::ModelSize, witness_log::{LatencyBreakdown, RoutingDecision, WitnessEntry, WitnessLog}, RuvLLMConfig, RuvLLMEngine, @@ -330,7 +330,7 @@ fn test_policy_store() { let policy = PolicyEntry { id: Uuid::new_v4(), policy_type: PolicyType::Quantization, - embedding: vec![0.1; 64], + embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 64]), parameters: serde_json::json!({ "precision": "q4_k", "quality_threshold": 0.9, @@ -346,7 +346,7 @@ fn test_policy_store() { store.store(policy).unwrap(); // Search - let query = vec![0.1; 64]; + let query = ruvector_core::types::QuantumVector::F32(vec![0.1; 64]); let results = store.search(&query, 5).unwrap(); assert!(!results.is_empty()); @@ -372,7 +372,7 @@ fn test_witness_log() { let entry = WitnessEntry::new( format!("session-{}", i % 2), - vec![i as f32 * 0.1; 64], + ruvector_core::types::QuantumVector::F32(vec![i as f32 * 0.1; 64]), routing_decision, ) .with_quality(0.85) @@ -392,7 +392,7 @@ fn test_witness_log() { log.flush().unwrap(); // Search - let query = vec![0.2; 64]; + let query = ruvector_core::types::QuantumVector::F32(vec![0.2; 64]); let results = log.search(&query, 3).unwrap(); // Results may be empty if flush didn't complete vector indexing @@ -675,11 +675,11 @@ fn test_sona_integration_basic() { let sona = SonaIntegration::new(config); // Record a trajectory - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: "req-1".to_string(), session_id: "test-session".to_string(), - query_embedding: vec![0.1; 256], - response_embedding: vec![0.2; 256], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 256]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 256]), quality_score: 0.8, routing_features: vec![0.7, 0.9, 0.5, 0.5], model_index: 1, diff --git a/crates/ruvllm/tests/sona_integration.rs b/crates/ruvllm/tests/sona_integration.rs index 5bcdbe628..6bd2e9b8b 100644 --- a/crates/ruvllm/tests/sona_integration.rs +++ b/crates/ruvllm/tests/sona_integration.rs @@ -19,7 +19,7 @@ use ruvllm::{ error::Result, sona::{ - LearningLoop, RoutingRecommendation, SonaConfig, SonaIntegration, SonaStats, Trajectory, + LearningLoop, RoutingRecommendation, SonaConfig, SonaIntegration, SonaStats, SonaTrajectory, }, }; use std::time::Duration; @@ -42,12 +42,12 @@ fn create_test_sona_config() -> SonaConfig { } /// Create a test trajectory -fn create_test_trajectory(request_id: &str, quality: f32) -> Trajectory { - Trajectory { +fn create_test_trajectory(request_id: &str, quality: f32) -> SonaTrajectory { + SonaTrajectory { request_id: request_id.to_string(), session_id: "test-session".to_string(), - query_embedding: vec![0.1; 128], - response_embedding: vec![0.2; 128], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 128]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 128]), quality_score: quality, routing_features: vec![0.7, 0.9, 0.5, 0.5], model_index: 1, @@ -163,7 +163,7 @@ fn test_sona_routing_recommendation_no_patterns() { let config = create_test_sona_config(); let sona = SonaIntegration::new(config); - let query = vec![0.1; 128]; + let query = ruvector_core::types::QuantumVector::F32(vec![0.1; 128]); let rec = sona.get_routing_recommendation(&query); // With no patterns, should return defaults @@ -185,7 +185,7 @@ fn test_sona_search_patterns_empty() { let config = create_test_sona_config(); let sona = SonaIntegration::new(config); - let query = vec![0.1; 128]; + let query = ruvector_core::types::QuantumVector::F32(vec![0.1; 128]); let patterns = sona.search_patterns(&query, 5); assert!(patterns.is_empty()); @@ -365,11 +365,11 @@ fn test_sona_large_embedding() { }; let sona = SonaIntegration::new(config); - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: "large-001".to_string(), session_id: "test".to_string(), - query_embedding: vec![0.1; 768], - response_embedding: vec![0.2; 768], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 768]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 768]), quality_score: 0.9, routing_features: vec![0.5; 4], model_index: 0, @@ -392,11 +392,11 @@ fn test_sona_model_index_mapping() { // Test different model indices for model_idx in 0..4 { - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: format!("model-{}", model_idx), session_id: "test".to_string(), - query_embedding: vec![0.1; 128], - response_embedding: vec![0.2; 128], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 128]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 128]), quality_score: 0.8, routing_features: vec![0.5; 4], model_index: model_idx, @@ -428,11 +428,11 @@ fn test_sona_concurrent_safe() { let sona_clone = Arc::clone(&sona); let handle = thread::spawn(move || { for i in 0..10 { - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: format!("thread-{}-req-{}", thread_id, i), session_id: format!("thread-{}", thread_id), - query_embedding: vec![0.1; 128], - response_embedding: vec![0.2; 128], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 128]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 128]), quality_score: 0.8, routing_features: vec![0.5; 4], model_index: 0, @@ -475,11 +475,11 @@ fn test_sona_stats_struct() { #[test] fn test_sona_routing_features() { - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: "routing-test".to_string(), session_id: "test".to_string(), - query_embedding: vec![0.1; 128], - response_embedding: vec![0.2; 128], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 128]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 128]), quality_score: 0.9, routing_features: vec![0.7, 0.9, 0.8, 0.5], // temperature, top_p, confidence, context_ratio model_index: 1, @@ -530,11 +530,11 @@ fn test_sona_negative_quality_handling() { let sona = SonaIntegration::new(config); // Negative quality should still be recorded but not trigger learning - let trajectory = Trajectory { + let trajectory = SonaTrajectory { request_id: "negative".to_string(), session_id: "test".to_string(), - query_embedding: vec![0.1; 128], - response_embedding: vec![0.2; 128], + query_embedding: ruvector_core::types::QuantumVector::F32(vec![0.1; 128]), + response_embedding: ruvector_core::types::QuantumVector::F32(vec![0.2; 128]), quality_score: -0.5, // Negative routing_features: vec![0.5; 4], model_index: 0, diff --git a/crates/rvf/README.md b/crates/rvf/README.md index e04cdd56b..b06392d83 100644 --- a/crates/rvf/README.md +++ b/crates/rvf/README.md @@ -515,7 +515,7 @@ RVF doesn't make you wait for the full index: ## πŸ“Š Comparison -| Feature | RVF | Annoy | FAISS | Qdrant | Milvus | +| Feature | RVF | Annoy | FAISS | LegacyDB | Milvus | |---------|-----|-------|-------|--------|--------| | Single-file format | Yes | Yes | No | No | No | | Crash-safe (no WAL) | Yes | No | No | Needs WAL | Needs WAL | @@ -550,7 +550,7 @@ RVF doesn't make you wait for the full index: ### vs Traditional Vector Databases -| | RVF | Pinecone / Milvus / Qdrant | +| | RVF | Pinecone / Milvus / LegacyDB | |---|---|---| | **Deployment** | Single file, zero dependencies | Server process + storage | | **Branching** | Native COW, 2.6 ms for 10K vectors | Copy entire collection | diff --git a/crates/rvf/rvf-launch/src/qemu.rs b/crates/rvf/rvf-launch/src/qemu.rs index 141478179..6b0668bda 100644 --- a/crates/rvf/rvf-launch/src/qemu.rs +++ b/crates/rvf/rvf-launch/src/qemu.rs @@ -20,15 +20,22 @@ pub struct QemuCommand { /// Check if KVM is available on this host. pub fn kvm_available() -> bool { - Path::new("/dev/kvm").exists() - && std::fs::metadata("/dev/kvm") - .map(|m| { - use std::os::unix::fs::PermissionsExt; - let mode = m.permissions().mode(); - // Check if the file is readable+writable by someone - mode & 0o666 != 0 - }) - .unwrap_or(false) + #[cfg(unix)] + { + Path::new("/dev/kvm").exists() + && std::fs::metadata("/dev/kvm") + .map(|m| { + use std::os::unix::fs::PermissionsExt; + let mode = m.permissions().mode(); + // Check if the file is readable+writable by someone + mode & 0o666 != 0 + }) + .unwrap_or(false) + } + #[cfg(not(unix))] + { + false + } } /// Locate the QEMU binary for the given architecture. diff --git a/crates/rvf/rvf-launch/src/qmp.rs b/crates/rvf/rvf-launch/src/qmp.rs index 96967843c..513aa445c 100644 --- a/crates/rvf/rvf-launch/src/qmp.rs +++ b/crates/rvf/rvf-launch/src/qmp.rs @@ -5,6 +5,7 @@ //! graceful or forced VM shutdown. use std::io::{BufRead, BufReader, Write}; +#[cfg(unix)] use std::os::unix::net::UnixStream; use std::path::Path; use std::time::Duration; @@ -12,10 +13,15 @@ use std::time::Duration; use crate::error::LaunchError; /// A minimal QMP client connected via a Unix socket. +#[cfg(unix)] pub struct QmpClient { stream: UnixStream, } +#[cfg(not(unix))] +pub struct QmpClient {} + +#[cfg(unix)] impl QmpClient { /// Connect to the QMP Unix socket and perform the capability /// negotiation handshake. @@ -89,6 +95,27 @@ impl QmpClient { } } +#[cfg(not(unix))] +impl QmpClient { + pub fn connect(_socket_path: &Path, _timeout: Duration) -> Result { + Err(LaunchError::Qmp( + "QMP over Unix sockets is not supported on Windows".to_string(), + )) + } + + pub fn system_powerdown(&mut self) -> Result<(), LaunchError> { + Ok(()) + } + + pub fn quit(&mut self) -> Result<(), LaunchError> { + Ok(()) + } + + pub fn query_status(&mut self) -> Result { + Ok("unknown".to_string()) + } +} + #[cfg(test)] mod tests { // QMP tests require a running QEMU instance, so we only test diff --git a/crates/rvlite/docs/INTEGRATION_SUCCESS.md b/crates/rvlite/docs/INTEGRATION_SUCCESS.md index 66b1d14c2..c773bde42 100644 --- a/crates/rvlite/docs/INTEGRATION_SUCCESS.md +++ b/crates/rvlite/docs/INTEGRATION_SUCCESS.md @@ -53,7 +53,7 @@ Features: | SQLite WASM | ~1 MB | SQL, Relational | | PGlite | ~3 MB | PostgreSQL, Full SQL | | Chroma WASM | N/A | Not available | -| Qdrant WASM | N/A | Not available | +| LegacyDB WASM | N/A | Not available | **RvLite is 10-30x smaller than comparable solutions!** diff --git a/crates/rvlite/src/lib.rs b/crates/rvlite/src/lib.rs index d795ea775..a97c7d1a7 100644 --- a/crates/rvlite/src/lib.rs +++ b/crates/rvlite/src/lib.rs @@ -337,7 +337,7 @@ impl RvLite { let entry = VectorEntry { id: None, - vector, + vector: ruvector_core::types::QuantumVector::F32(vector), metadata: metadata_map, }; @@ -367,7 +367,7 @@ impl RvLite { let entry = VectorEntry { id: Some(id), - vector, + vector: ruvector_core::types::QuantumVector::F32(vector), metadata: metadata_map, }; @@ -379,7 +379,7 @@ impl RvLite { /// Search for similar vectors pub fn search(&self, query_vector: Vec, k: usize) -> Result { let query = SearchQuery { - vector: query_vector, + vector: ruvector_core::types::QuantumVector::F32(query_vector), k, filter: None, ef_search: None, @@ -412,7 +412,7 @@ impl RvLite { })?; let query = SearchQuery { - vector: query_vector, + vector: ruvector_core::types::QuantumVector::F32(query_vector), k, filter: Some(filter_map), ef_search: None, @@ -621,7 +621,7 @@ impl RvLite { .flatten() .map(|entry| storage::state::VectorEntry { id: entry.id.unwrap_or_default(), - vector: entry.vector, + vector: entry.vector.to_f32_vec(), metadata: entry.metadata, }) }) @@ -659,7 +659,7 @@ impl RvLite { for entry in &state.vectors.entries { let vector_entry = VectorEntry { id: Some(entry.id.clone()), - vector: entry.vector.clone(), + vector: ruvector_core::types::QuantumVector::F32(entry.vector.clone()), metadata: entry.metadata.clone(), }; self.db diff --git a/crates/rvlite/src/sql/executor.rs b/crates/rvlite/src/sql/executor.rs index 0bda1fa2c..10f63ff95 100644 --- a/crates/rvlite/src/sql/executor.rs +++ b/crates/rvlite/src/sql/executor.rs @@ -2,7 +2,7 @@ use super::ast::*; use crate::{ErrorKind, RvLiteError}; use parking_lot::RwLock; -use ruvector_core::{SearchQuery, VectorDB, VectorEntry}; +use ruvector_core::{types::QuantumVector, SearchQuery, VectorDB, VectorEntry}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -228,7 +228,7 @@ impl SqlEngine { // Insert into vector database let entry = VectorEntry { id, - vector, + vector: QuantumVector::F32(vector), metadata: Some(metadata), }; @@ -287,7 +287,7 @@ impl SqlEngine { }; let query = SearchQuery { - vector, + vector: QuantumVector::F32(vector), k, filter, ef_search: None, @@ -307,7 +307,7 @@ impl SqlEngine { // Add vector if present if let Some(vec_col) = &schema.vector_column { if let Some(vector) = result.vector { - row.insert(vec_col.clone(), Value::Vector(vector)); + row.insert(vec_col.clone(), Value::Vector(vector.to_f32_vec())); } } @@ -348,7 +348,7 @@ impl SqlEngine { }; let query = SearchQuery { - vector: query_vector, + vector: QuantumVector::F32(query_vector), k, filter, ef_search: None, @@ -368,7 +368,7 @@ impl SqlEngine { // Add vector if present if let Some(vec_col) = &schema.vector_column { if let Some(vector) = result.vector { - row.insert(vec_col.clone(), Value::Vector(vector)); + row.insert(vec_col.clone(), Value::Vector(vector.to_f32_vec())); } } diff --git a/docs/adr/ADR-001-ruvector-core-architecture.md b/docs/adr/ADR-001-ruvector-core-architecture.md index b489a0ce5..98db81052 100644 --- a/docs/adr/ADR-001-ruvector-core-architecture.md +++ b/docs/adr/ADR-001-ruvector-core-architecture.md @@ -35,7 +35,7 @@ Existing solutions fall into several categories: | Category | Examples | Limitations | |----------|----------|-------------| | **Cloud-only** | Pinecone | No edge deployment, vendor lock-in | -| **Heavy native** | Milvus, Qdrant | Complex deployment, high memory | +| **Heavy native** | Milvus, LegacyDB | Complex deployment, high memory | | **Python-first** | ChromaDB, FAISS | Performance overhead, no WASM | | **Learning-capable** | None | No existing solutions learn from usage | @@ -457,7 +457,7 @@ Cryptographically-linked audit trail: - Complex cross-compilation - Build system complexity (CMake) -### Alternative 3: Qdrant/Milvus Integration +### Alternative 3: LegacyDB/Milvus Integration **Rejected because:** - External service dependency diff --git a/docs/architecture/TECHNICAL_PLAN.md b/docs/architecture/TECHNICAL_PLAN.md index 39423a61b..a16483eb2 100644 --- a/docs/architecture/TECHNICAL_PLAN.md +++ b/docs/architecture/TECHNICAL_PLAN.md @@ -31,7 +31,7 @@ Current solutions force you to choose: ## Market Comparison Table -| Feature | Ruvector | Pinecone | Qdrant | ChromaDB | pgvector | Your AgenticDB | +| Feature | Ruvector | Pinecone | LegacyDB | ChromaDB | pgvector | Your AgenticDB | |---------|----------|----------|--------|----------|----------|----------------| | **Speed (QPS)** | 50K+ | 100K+ | 30K+ | 500 | 1K | ~100 | | **Latency (p50)** | <0.5ms | ~2ms | ~1ms | ~50ms | ~10ms | ~5ms | @@ -47,15 +47,15 @@ Current solutions force you to choose: ## Closest Market Equivalents -### 1. **Qdrant** (Rust vector DB) +### 1. **LegacyDB** (Rust vector DB) **What it is:** Production Rust vector database, cloud + self-hosted **Similarity:** Same tech stack (Rust + HNSW), similar performance goals **Key differences:** -- Qdrant = server-only, ruvector = anywhere (server, browser, mobile) -- Qdrant = generic API, ruvector = AgenticDB-compatible cognitive features -- Qdrant = separate Node.js client, ruvector = native NAPI-RS bindings +- LegacyDB = server-only, ruvector = anywhere (server, browser, mobile) +- LegacyDB = generic API, ruvector = AgenticDB-compatible cognitive features +- LegacyDB = separate Node.js client, ruvector = native NAPI-RS bindings -**Market position:** Qdrant is your closest competitor on performance, but lacks browser/edge deployment. +**Market position:** LegacyDB is your closest competitor on performance, but lacks browser/edge deployment. ### 2. **LanceDB** (Embedded vector DB) **What it is:** Embedded database in Rust/Python, serverless-friendly @@ -91,7 +91,7 @@ Current solutions force you to choose: **The "triple unlock":** -1. **Speed of compiled languages** (like Qdrant/Milvus) +1. **Speed of compiled languages** (like LegacyDB/Milvus) 2. **Cognitive features of AgenticDB** (reflexion, skills, causal memory) 3. **Browser deployment capability** (like RxDB but 100x faster) @@ -126,7 +126,7 @@ Current solutions force you to choose: ## Technical Differentiators That Matter ### 1. **Multi-Platform from Single Codebase** -**Problem:** Weaviate/Qdrant = separate clients per platform +**Problem:** Weaviate/LegacyDB = separate clients per platform **Ruvector:** Same Rust code compiles to: - `npm install ruvector` (Node.js via NAPI-RS) - `