From fb0573d7d676cc962b68b0ea90b010e706f02944 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 15 Oct 2025 00:18:36 +0530 Subject: [PATCH 01/25] Resolve merge with refactor --- Cargo.lock | 25 ++ Cargo.toml | 1 + crates/defs/src/error.rs | 1 + crates/index/Cargo.toml | 1 + crates/index/src/kd_tree.rs | 516 ++++++++++++++++++++++-------------- crates/index/src/lib.rs | 3 +- 6 files changed, 343 insertions(+), 204 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b3e627..4b28e53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "bincode" version = "1.3.3" @@ -181,6 +187,7 @@ name = "index" version = "0.1.0" dependencies = [ "defs", + "ordered-float", ] [[package]] @@ -295,12 +302,30 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "ordered-float" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" +dependencies = [ + "num-traits", +] + [[package]] name = "peeking_take_while" version = "0.1.2" diff --git a/Cargo.toml b/Cargo.toml index 670d962..1ecb74c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/server", ] + # You can define shared dependencies for all crates here [workspace.dependencies] # tokio = { version = "1.37.0", features = ["full"] } diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index 1079c5e..ff15895 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -6,4 +6,5 @@ pub enum DbError { DeserializationError, IndexError(String), LockError, + IndexInitError, //TODO: Change this } diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index b54e3c8..c81c18f 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -6,5 +6,6 @@ version = "0.1.0" edition = "2021" [dependencies] +ordered-float = "5.0.0" defs = { path = "../defs" } diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 444f578..92b30b8 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -1,252 +1,362 @@ -use std::cmp::Ordering; -use std::cmp::Ordering::Less; - -use serde_derive::{Deserialize, Serialize}; - -#[derive(Serialize, Deserialize)] -pub struct KDTreeInternals { - pub kd_tree_allow_update: bool, - pub current_number_of_kd_tree_nodes: usize, - pub rebuild_threshold: f32, - pub previous_tree_size: usize, - pub rebuild_counter: usize, +use core::{DbError, DenseVector, IndexedVector, PointId, Similarity}; +use std::{ + cmp::Ordering, + collections::{BinaryHeap, HashMap}, + vec, +}; + +use crate::{distance, VectorIndex}; + +pub struct KDTree { + dim: usize, + root: Option>, + // An in memory point map for lookup during delete + point_map: HashMap, } -#[derive(Serialize, Deserialize)] +// the node which will be the part of the KD Tree pub struct KDTreeNode { - pub left: Option>, - pub right: Option>, - pub key: String, - pub vector: Vec, - pub dim: usize, + indexed_vector: IndexedVector, + split_dim: usize, + left: Option>, + right: Option>, + is_deleted: bool, } -impl KDTreeNode { - // Add the logic here to create a new db and insert the tree into the database - fn new(data: (String, Vec), dim: usize) -> KDTreeNode { - KDTreeNode { - left: None, - right: None, - key: data.0, - vector: data.1, - dim, - } +#[derive(Debug, Clone, PartialEq)] +struct Neighbor { + id: PointId, + distance: f32, +} + +impl Eq for Neighbor {} + +// Custom Ord implementation for the max-heap +impl Ord for Neighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance + .partial_cmp(&other.distance) + .unwrap_or(Ordering::Equal) } } -pub struct KDTree { - pub _root: Option>, - pub _internals: KDTreeInternals, - pub is_debug_run: bool, - pub dim: usize, +impl PartialOrd for Neighbor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } impl KDTree { - // Create an empty tree with default values - pub fn new() -> KDTree { + pub fn mock() { + //here is the mock code + } + + // Build an empty index with no points + pub fn build_empty(dim: usize) -> Self { KDTree { - _root: None, - _internals: KDTreeInternals { - kd_tree_allow_update: true, - current_number_of_kd_tree_nodes: 0, - rebuild_threshold: 2.0f32, - previous_tree_size: 0, - rebuild_counter: 0, - }, - is_debug_run: true, - dim: 0, + dim, + root: None, + point_map: HashMap::new(), } } - // Add a node - // If the dimension of the tree is zero, then it becomes equal to the input data - pub fn add_node(&mut self, data: (String, Vec), depth: usize) { - if self._root.is_none() { - self.dim = data.1.len(); - self._root = Some(Box::new(KDTreeNode::new(data, 0))); - self._internals.current_number_of_kd_tree_nodes += 1; - return; - } + // Builds the vector index from provided vectors, there should atleast be single vector for dim calculation + pub fn build(mut vectors: Vec) -> Result { + if vectors.is_empty() { + Err(DbError::IndexInitError) + } else { + let dim = vectors[0].vector.len(); - assert_eq!(self.dim, data.1.len()); + let mut point_map = HashMap::with_capacity(vectors.len()); + for iv in vectors.iter() { + point_map.insert(iv.id, iv.vector.clone()); + } + let root_node = Self::build_recursive(&mut vectors, 0, dim); + Ok(KDTree { + dim, + root: Some(root_node), + point_map, + }) + } + } - if !self._internals.kd_tree_allow_update { - println!("KDTree is locked for rebuild"); - return; + // Builds the tree recursively with given vectors and returns the pointer of the root node + pub fn build_recursive( + vectors: &mut [IndexedVector], + depth: usize, + dim: usize, + ) -> Box { + if vectors.is_empty() { + panic!("Cannot build from an empty slice recursively"); } - if self._internals.previous_tree_size != 0 { - let current_ratio: f32 = self._internals.current_number_of_kd_tree_nodes as f32 - / self._internals.previous_tree_size as f32; - if current_ratio > self._internals.rebuild_threshold { - self._internals.previous_tree_size = - self._internals.current_number_of_kd_tree_nodes; - self.rebuild(); - } + let axis = depth % dim; + let mid_idx = vectors.len() / 2; + + vectors.select_nth_unstable_by(mid_idx, |a, b| { + let a_at_axis = a.vector[axis]; + let b_at_axis = b.vector[axis]; + a_at_axis.partial_cmp(&b_at_axis).unwrap_or(Ordering::Equal) + }); + + // Using swap so that we don't need to clone the whole vector + let mut median_vec = IndexedVector { + id: 0, + vector: vec![], + }; // dummy + std::mem::swap(&mut vectors[mid_idx], &mut median_vec); + + let (left_points, right_points_with_median) = vectors.split_at_mut(mid_idx); + let right_points = &mut right_points_with_median[1..]; // Exclude the swapped-out median + + let left = if left_points.is_empty() { + None } else { - self._internals.previous_tree_size = self._internals.current_number_of_kd_tree_nodes; + Some(Self::build_recursive(left_points, depth + 1, dim)) + }; + + let right = if right_points.is_empty() { + None + } else { + Some(Self::build_recursive(right_points, depth + 1, dim)) + }; + + Box::new(KDTreeNode { + indexed_vector: median_vec, + split_dim: axis, + left, + right, + is_deleted: false, + }) + } + + pub fn insert_point(&mut self, new_vector: IndexedVector) { + // use a traverse function to get the final leaf where this belongs + if self.root.is_none() { + self.root = Some(Box::new(KDTreeNode { + indexed_vector: new_vector, + split_dim: 0, + left: None, + right: None, + is_deleted: false, + })); + return; } - self._internals.current_number_of_kd_tree_nodes += 1; - - let mut current_node = self._root.as_deref_mut().unwrap(); - let mut current_depth = depth; - loop { - let current_dimension = current_depth % self.dim; - if data.1[current_dimension] < current_node.vector[current_dimension] { - if current_node.left.is_none() { - current_node.left = Some(Box::new(KDTreeNode::new(data, current_dimension))); - break; - } else { - current_node = current_node.left.as_deref_mut().unwrap(); - current_depth += 1; - } + let mut current_link = &mut self.root; + let mut depth = 0; + let dim = self.dim; + + while let Some(ref mut node_box) = current_link { + let axis = depth % dim; + let current_node = node_box.as_mut(); + + let va = new_vector.vector[axis]; + let vb = current_node.indexed_vector.vector[axis]; + + if va <= vb { + current_link = &mut current_node.left; } else { - if current_node.right.is_none() { - current_node.right = Some(Box::new(KDTreeNode::new(data, current_dimension))); - break; - } else { - current_node = current_node.right.as_deref_mut().unwrap(); - current_depth += 1; - } + current_link = &mut current_node.right; } + depth += 1; } + + // Assign the new node to current link which is &mut Option> + let axis = depth % dim; + *current_link = Some(Box::new(KDTreeNode { + indexed_vector: new_vector, + split_dim: axis, + left: None, + right: None, + is_deleted: false, + })) } - // rebuild tree - fn rebuild(&mut self) { - self._internals.kd_tree_allow_update = false; - self._internals.rebuild_counter += 1; - if self.is_debug_run { - println!( - "Rebuilding tree..., Rebuild counter: {:?}", - self._internals.rebuild_counter + // Deletes the point by first finding the corresponding node using DFS and then deleting + // Returns true if point found and deleted, else false + // First make a lookup of vector from map, then traverse the tree to obtain the point and mark it as deleted + pub fn delete_point(&mut self, point_id: PointId) -> bool { + if let Some(vector_to_delete) = self.point_map.get(&point_id) { + let found_and_deleted = Self::find_and_mark_recursive( + &mut self.root, + vector_to_delete, + point_id, + 0, + self.dim, ); + + if found_and_deleted { + self.point_map.remove(&point_id); + } + + return found_and_deleted; } - let mut points = Vec::into_boxed_slice(self.traversal(0)); - self._root = Some(Box::new(create_tree_helper(points.as_mut(), 0))); - self._internals.kd_tree_allow_update = true; + false } - // traversal - pub fn traversal(&self, k_value: usize) -> Vec<(String, Vec)> { - let mut result: Vec<(String, Vec)> = Vec::new(); - inorder_traversal_helper(self._root.as_deref(), &mut result, k_value); - result - } + // Recursively finds and marks a node as deleted, + fn find_and_mark_recursive( + node_opt: &mut Option>, + target_vector: &DenseVector, + target_id: PointId, + depth: usize, + dim: usize, + ) -> bool { + if let Some(node) = node_opt { + if node.indexed_vector.id == target_id { + node.is_deleted = true; + return true; + } - // delete a node - pub fn delete_node(&mut self, data: String) { - self._internals.kd_tree_allow_update = false; - let mut points = self.traversal(0); - let index = points.iter().position(|x| *x.0 == data).unwrap(); - points.remove(index); - let mut points = Vec::into_boxed_slice(points); - self._root = Some(Box::new(create_tree_helper(points.as_mut(), 0))); - self._internals.kd_tree_allow_update = true; - } + let axis = depth % dim; + let target_val = target_vector[axis]; + let node_val = node.indexed_vector.vector[axis]; - // print data for debug - pub fn print_tree_for_debug(&self) { - let iterated: Vec<(String, Vec)> = self.traversal(0); - for iter in iterated { - println!("{}", iter.0); + if target_val < node_val { + Self::find_and_mark_recursive( + &mut node.left, + target_vector, + target_id, + depth + 1, + dim, + ) + } else if target_val > node_val { + Self::find_and_mark_recursive( + &mut node.right, + target_vector, + target_id, + depth + 1, + dim, + ) + } else { + // Need to check both right and left nodes in this case + let left_found = Self::find_and_mark_recursive( + &mut node.left, + target_vector, + target_id, + depth + 1, + dim, + ); + let right_found = Self::find_and_mark_recursive( + &mut node.right, + target_vector, + target_id, + depth + 1, + dim, + ); + left_found || right_found + } + } else { + false } } - // different methods of knn -} + pub fn search_top_k( + &self, + query_vector: DenseVector, + k: usize, + dist_type: Similarity, + ) -> Vec<(PointId, f32)> { + //Searches for top k closest vectors according to specified metric -// Traversal helper function -fn inorder_traversal_helper( - node: Option<&KDTreeNode>, - result: &mut Vec<(String, Vec)>, - k_value: usize, -) -> Option { - if node.is_none() { - return None; - } - if k_value != 0 && k_value <= result.len() { - return None; - } - let current_node = node.unwrap(); - inorder_traversal_helper(current_node.to_owned().left.as_deref(), result, k_value); - result.push((current_node.key.clone(), current_node.vector.clone())); - inorder_traversal_helper(current_node.to_owned().right.as_deref(), result, k_value); + if self.root.is_none() || k == 0 { + return Vec::new(); + } - Some(true) -} + let mut best_neighbours = BinaryHeap::with_capacity(k); -// Rebuild tree helper functions -fn create_tree_helper(points: &mut [(String, Vec)], dim: usize) -> KDTreeNode { - let points_len = points.len(); - if points_len == 1 { - return KDTreeNode { - key: points[0].0.clone(), - vector: points[0].1.clone(), - left: None, - right: None, - dim, - }; + self.search_recursive( + &self.root, + &query_vector, + k, + &mut best_neighbours, + 0, + dist_type, + ); + + best_neighbours + .into_sorted_vec() + .iter() + .map(|neighbor| (neighbor.id, neighbor.distance)) + .collect() } - // Split around the median - let pivot = quickselect_by(points, points_len / 2, &|a, b| { - a.1[dim].partial_cmp(&b.1[dim]).unwrap() - }); - - let left = Some(Box::new(create_tree_helper( - &mut points[0..points_len / 2], - (dim + 1) % pivot.1.len(), - ))); - let right = if points.len() >= 3 { - Some(Box::new(create_tree_helper( - &mut points[points_len / 2 + 1..points_len], - (dim + 1) % pivot.1.len(), - ))) - } else { - None - }; - - KDTreeNode { - key: pivot.0, - vector: pivot.1, - left, - right, - dim, + fn search_recursive( + &self, + node_opt: &Option>, + query_vector: &DenseVector, + k: usize, + heap: &mut BinaryHeap, + depth: usize, + dist_type: Similarity, + ) { + // Base case is that we hit a leaf node don't do anything + if let Some(node) = node_opt { + let axis = depth % self.dim; + + let (near_side, far_side) = if query_vector[axis] <= node.indexed_vector.vector[axis] { + (&node.left, &node.right) + } else { + (&node.right, &node.left) + }; + + // Recurse on near side first + self.search_recursive(&near_side, query_vector, k, heap, depth + 1, dist_type); + + // Process the current node + if !node.is_deleted { + //TODO: Use square distance in distance, why is there overhead of square + let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); + if heap.len() < k { + heap.push(Neighbor { + id: node.indexed_vector.id, + distance, + }); + } else if distance < heap.peek().unwrap().distance { + heap.pop(); + heap.push(Neighbor { + id: node.indexed_vector.id, + distance, + }); + } + } + + // Pruning on the farther side to check if there are better candidates + //TODO: Change this when implementing square distance + let dist_to_plane = match dist_type { + Similarity::Euclidean => query_vector[axis] - node.indexed_vector.vector[axis], + Similarity::Manhattan => 1.0, + _ => unreachable!(), + }; + + if heap.len() < k || dist_to_plane < heap.peek().unwrap().distance { + self.search_recursive(far_side, query_vector, k, heap, depth + 1, dist_type); + } + } } } -fn quickselect_by(arr: &mut [T], position: usize, cmp: &dyn Fn(&T, &T) -> Ordering) -> T -where - T: Clone, -{ - let mut pivot_index = 0; - // Need to wrap in another closure or we get ownership complaints. - // Tried using an unboxed closure to get around this but couldn't get it to work. - pivot_index = partition_by(arr, pivot_index, &|a: &T, b: &T| cmp(a, b)); - let array_len = arr.len(); - match position.cmp(&pivot_index) { - Ordering::Equal => arr[position].clone(), - Ordering::Less => quickselect_by(&mut arr[0..pivot_index], position, cmp), - Ordering::Greater => quickselect_by( - &mut arr[pivot_index + 1..array_len], - position - pivot_index - 1, - cmp, - ), +impl VectorIndex for KDTree { + fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { + self.insert_point(vector); + Ok(()) + } + + fn delete(&mut self, point_id: PointId) -> Result { + Ok(self.delete_point(point_id)) } -} -fn partition_by(arr: &mut [T], pivot_index: usize, cmp: &dyn Fn(&T, &T) -> Ordering) -> usize { - let array_len = arr.len(); - arr.swap(pivot_index, array_len - 1); - let mut store_index = 0; - for i in 0..array_len - 1 { - if cmp(&arr[i], &arr[array_len - 1]) == Less { - arr.swap(i, store_index); - store_index += 1; + fn search( + &self, + query_vector: core::DenseVector, + similarity: Similarity, + k: usize, + ) -> Result, DbError> { + if matches!(similarity, Similarity::Cosine | Similarity::Hamming) { + panic!("Cosine and hamming are not suitable similariyt metric when using a KDTree") } + + Ok(vec![]) } - arr.swap(array_len - 1, store_index); - store_index } diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index ef93755..eac20f0 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -1,6 +1,7 @@ use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; pub mod flat; +pub mod kd_tree; pub trait VectorIndex { fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError>; @@ -19,7 +20,7 @@ pub trait VectorIndex { } /// Distance function to get the distance between two vectors (taken from old version) -pub fn distance(a: DenseVector, b: DenseVector, dist_type: Similarity) -> f32 { +pub fn distance(a: &DenseVector, b: &DenseVector, dist_type: Similarity) -> f32 { assert_eq!(a.len(), b.len()); match dist_type { Similarity::Euclidean => { From 799dc77e74787a09f3ce2a28d57f5e5a4ac050b1 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 15 Oct 2025 00:28:15 +0530 Subject: [PATCH 02/25] Fix merge residues --- crates/index/src/flat.rs | 2 +- crates/index/src/kd_tree.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/index/src/flat.rs b/crates/index/src/flat.rs index 53dc361..682a4d3 100644 --- a/crates/index/src/flat.rs +++ b/crates/index/src/flat.rs @@ -47,7 +47,7 @@ impl VectorIndex for FlatIndex { .index .iter() .map(|point| DistanceOrderedVector { - distance: distance(point.vector.clone(), query_vector.clone(), similarity), + distance: distance(&point.vector, &query_vector, similarity), query_vector: &query_vector, point_id: Some(point.id), }) diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 92b30b8..310ee4f 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -1,4 +1,4 @@ -use core::{DbError, DenseVector, IndexedVector, PointId, Similarity}; +use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap}, @@ -349,7 +349,7 @@ impl VectorIndex for KDTree { fn search( &self, - query_vector: core::DenseVector, + query_vector: DenseVector, similarity: Similarity, k: usize, ) -> Result, DbError> { From 84b3069dd0ba9a8b6f62493b50f027077aac9ec9 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 17 Dec 2025 12:14:03 +0000 Subject: [PATCH 03/25] Update Cargo.lock --- Cargo.lock | 522 +++++++++++++++++------------------- crates/index/src/kd_tree.rs | 10 +- 2 files changed, 255 insertions(+), 277 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c82b36b..f6a1628 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -82,22 +82,15 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-link 0.2.1", + "windows-link", ] -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - [[package]] name = "bincode" version = "1.3.3" @@ -134,7 +127,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -154,21 +147,21 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "bzip2-sys" @@ -197,9 +190,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.40" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d05d92f4b1fd76aad469d46cdd858ca761576082cd37df81416691e50199fb" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ "find-msvc-tools", "jobserver", @@ -218,9 +211,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" @@ -233,7 +226,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -309,9 +302,7 @@ version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "defs", - "ordered-float", - "bitflags 2.9.4", + "bitflags 2.10.0", "crossterm_winapi", "libc", "mio 0.8.11", @@ -405,9 +396,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0399f9d26e5191ce32c498bebd31e7a3ceabc2745f0ac54af3f335126c3f24b3" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] name = "fnv" @@ -456,34 +447,12 @@ dependencies = [ ] [[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "once_cell" -version = "1.21.3" name = "futures-core" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] -name = "ordered-float" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" -dependencies = [ - "num-traits", -] - -[[package]] -name = "peeking_take_while" -version = "0.1.2" name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -525,19 +494,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "wasip2", ] [[package]] @@ -582,6 +551,12 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "heck" version = "0.5.0" @@ -590,12 +565,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -630,9 +604,9 @@ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "hyper" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ "atomic-waker", "bytes", @@ -684,9 +658,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ "base64", "bytes", @@ -734,9 +708,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -747,9 +721,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -760,11 +734,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -775,42 +748,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -850,28 +819,18 @@ name = "index" version = "0.1.0" dependencies = [ "defs", + "ordered-float", "uuid", ] [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown", -] - -[[package]] -name = "io-uring" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" -dependencies = [ - "bitflags 2.9.4", - "cfg-if", - "libc", + "hashbrown 0.16.1", ] [[package]] @@ -882,9 +841,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -920,15 +879,15 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.81" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -948,9 +907,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.176" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libloading" @@ -959,7 +918,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -980,9 +939,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.22" +version = "1.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" dependencies = [ "cc", "pkg-config", @@ -997,9 +956,9 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" @@ -1012,9 +971,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru" @@ -1022,7 +981,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown", + "hashbrown 0.15.5", ] [[package]] @@ -1080,19 +1039,19 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.48.0", ] [[package]] name = "mio" -version = "1.0.4" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys 0.61.2", ] [[package]] @@ -1148,11 +1107,11 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl" -version = "0.10.73" +version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cfg-if", "foreign-types", "libc", @@ -1180,9 +1139,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.109" +version = "0.9.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", @@ -1190,6 +1149,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ordered-float" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" +dependencies = [ + "num-traits", +] + [[package]] name = "owo-colors" version = "4.2.3" @@ -1216,7 +1184,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -1257,9 +1225,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -1276,18 +1244,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -1304,7 +1272,7 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f44c9e68fd46eda15c646fbb85e1040b657a58cdc8c98db1d97a55930d991eef" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "cassowary", "compact_str", "crossterm", @@ -1324,14 +1292,14 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] name = "regex" -version = "1.11.3" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -1341,9 +1309,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -1352,15 +1320,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "reqwest" -version = "0.12.23" +version = "0.12.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" dependencies = [ "base64", "bytes", @@ -1447,7 +1415,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys", @@ -1456,9 +1424,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.32" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "once_cell", "rustls-pki-types", @@ -1469,18 +1437,18 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.7" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "ring", "rustls-pki-types", @@ -1520,7 +1488,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "core-foundation", "core-foundation-sys", "libc", @@ -1628,9 +1596,9 @@ dependencies = [ [[package]] name = "signal-hook-mio" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" dependencies = [ "libc", "mio 0.8.11", @@ -1639,9 +1607,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] @@ -1660,12 +1628,12 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1732,9 +1700,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -1767,7 +1735,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "core-foundation", "system-configuration-sys", ] @@ -1789,7 +1757,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix", "windows-sys 0.61.2", @@ -1806,9 +1774,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -1816,29 +1784,26 @@ dependencies = [ [[package]] name = "tokio" -version = "1.47.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", - "mio 1.0.4", + "mio 1.1.1", "parking_lot", "pin-project-lite", "signal-hook-registry", - "slab", "socket2", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", @@ -1867,9 +1832,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -1895,11 +1860,11 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "bytes", "futures-util", "http", @@ -1925,9 +1890,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "pin-project-lite", "tracing-core", @@ -1935,9 +1900,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -1955,9 +1920,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "sharded-slab", "thread_local", @@ -1999,9 +1964,9 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-segmentation" @@ -2052,13 +2017,13 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -2089,15 +2054,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -2109,9 +2065,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -2120,25 +2076,11 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - [[package]] name = "wasm-bindgen-futures" -version = "0.4.54" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -2149,9 +2091,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2159,31 +2101,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.104" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.81" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -2219,9 +2161,9 @@ checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.2.1", - "windows-result 0.4.1", - "windows-strings 0.5.1", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] @@ -2246,12 +2188,6 @@ dependencies = [ "syn", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" @@ -2260,22 +2196,13 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-registry" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" -dependencies = [ - "windows-link 0.1.3", - "windows-result 0.3.4", - "windows-strings 0.4.2", -] - -[[package]] -name = "windows-result" -version = "0.3.4" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" dependencies = [ - "windows-link 0.1.3", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] @@ -2284,16 +2211,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.2.1", -] - -[[package]] -name = "windows-strings" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" -dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -2302,7 +2220,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2325,11 +2243,11 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.53.5", ] [[package]] @@ -2338,7 +2256,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2365,13 +2283,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2384,6 +2319,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2396,6 +2337,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2408,12 +2355,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2426,6 +2385,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2438,6 +2403,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2450,6 +2421,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2462,6 +2439,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.46.0" @@ -2470,17 +2453,16 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -2488,9 +2470,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", @@ -2527,9 +2509,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -2538,9 +2520,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -2549,9 +2531,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 310ee4f..5f25edd 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -1,11 +1,11 @@ +use crate::{distance, VectorIndex}; use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap}, vec, }; - -use crate::{distance, VectorIndex}; +use uuid::Uuid; pub struct KDTree { dim: usize, @@ -47,10 +47,6 @@ impl PartialOrd for Neighbor { } impl KDTree { - pub fn mock() { - //here is the mock code - } - // Build an empty index with no points pub fn build_empty(dim: usize) -> Self { KDTree { @@ -101,7 +97,7 @@ impl KDTree { // Using swap so that we don't need to clone the whole vector let mut median_vec = IndexedVector { - id: 0, + id: Uuid::new_v4(), vector: vec![], }; // dummy std::mem::swap(&mut vectors[mid_idx], &mut median_vec); From fd02eb0d05b7548e8718fa0a5b19aff7da2787d9 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Wed, 17 Dec 2025 20:42:23 +0000 Subject: [PATCH 04/25] Debug without rebuild --- crates/defs/src/error.rs | 1 + crates/index/src/kd_tree.rs | 113 +++++++++++------------------------- 2 files changed, 34 insertions(+), 80 deletions(-) diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index ff15895..ef476e0 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -7,4 +7,5 @@ pub enum DbError { IndexError(String), LockError, IndexInitError, //TODO: Change this + UnsupportedSimilarity, } diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 5f25edd..29f8fb3 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -2,7 +2,7 @@ use crate::{distance, VectorIndex}; use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ cmp::Ordering, - collections::{BinaryHeap, HashMap}, + collections::{BinaryHeap, HashSet}, vec, }; use uuid::Uuid; @@ -10,8 +10,8 @@ use uuid::Uuid; pub struct KDTree { dim: usize, root: Option>, - // An in memory point map for lookup during delete - point_map: HashMap, + // In memory point ids, to check existence before O(n) deletion logic + point_ids: HashSet, } // the node which will be the part of the KD Tree @@ -52,7 +52,7 @@ impl KDTree { KDTree { dim, root: None, - point_map: HashMap::new(), + point_ids: HashSet::new(), } } @@ -63,15 +63,16 @@ impl KDTree { } else { let dim = vectors[0].vector.len(); - let mut point_map = HashMap::with_capacity(vectors.len()); - for iv in vectors.iter() { - point_map.insert(iv.id, iv.vector.clone()); + let mut point_ids = HashSet::with_capacity(vectors.len()); + for indexed_vector in vectors.iter() { + point_ids.insert(indexed_vector.id); } + let root_node = Self::build_recursive(&mut vectors, 0, dim); Ok(KDTree { dim, root: Some(root_node), - point_map, + point_ids, }) } } @@ -127,6 +128,9 @@ impl KDTree { } pub fn insert_point(&mut self, new_vector: IndexedVector) { + // Add to point_ids + self.point_ids.insert(new_vector.id); + // use a traverse function to get the final leaf where this belongs if self.root.is_none() { self.root = Some(Box::new(KDTreeNode { @@ -166,83 +170,31 @@ impl KDTree { left: None, right: None, is_deleted: false, - })) + })); } - // Deletes the point by first finding the corresponding node using DFS and then deleting // Returns true if point found and deleted, else false - // First make a lookup of vector from map, then traverse the tree to obtain the point and mark it as deleted - pub fn delete_point(&mut self, point_id: PointId) -> bool { - if let Some(vector_to_delete) = self.point_map.get(&point_id) { - let found_and_deleted = Self::find_and_mark_recursive( - &mut self.root, - vector_to_delete, - point_id, - 0, - self.dim, - ); - - if found_and_deleted { - self.point_map.remove(&point_id); + pub fn delete_point(&mut self, point_id: &PointId) -> bool { + if self.point_ids.contains(point_id) { + let deleted = Self::find_and_mark_deleted(&mut self.root, *point_id); + if deleted { + self.point_ids.remove(point_id); } - - return found_and_deleted; + return deleted; } false } - // Recursively finds and marks a node as deleted, - fn find_and_mark_recursive( - node_opt: &mut Option>, - target_vector: &DenseVector, - target_id: PointId, - depth: usize, - dim: usize, - ) -> bool { + fn find_and_mark_deleted(node_opt: &mut Option>, target_id: PointId) -> bool { if let Some(node) = node_opt { if node.indexed_vector.id == target_id { node.is_deleted = true; return true; } - let axis = depth % dim; - let target_val = target_vector[axis]; - let node_val = node.indexed_vector.vector[axis]; - - if target_val < node_val { - Self::find_and_mark_recursive( - &mut node.left, - target_vector, - target_id, - depth + 1, - dim, - ) - } else if target_val > node_val { - Self::find_and_mark_recursive( - &mut node.right, - target_vector, - target_id, - depth + 1, - dim, - ) - } else { - // Need to check both right and left nodes in this case - let left_found = Self::find_and_mark_recursive( - &mut node.left, - target_vector, - target_id, - depth + 1, - dim, - ); - let right_found = Self::find_and_mark_recursive( - &mut node.right, - target_vector, - target_id, - depth + 1, - dim, - ); - left_found || right_found - } + // Search left first then right + Self::find_and_mark_deleted(&mut node.left, target_id) + || Self::find_and_mark_deleted(&mut node.right, target_id) } else { false } @@ -298,11 +250,11 @@ impl KDTree { }; // Recurse on near side first - self.search_recursive(&near_side, query_vector, k, heap, depth + 1, dist_type); + self.search_recursive(near_side, query_vector, k, heap, depth + 1, dist_type); // Process the current node if !node.is_deleted { - //TODO: Use square distance in distance, why is there overhead of square + // TODO: Possible overhead, here heap stores sqrt euclidean distance, we can eliminate that by storing squared distances in case of euclidean let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); if heap.len() < k { heap.push(Neighbor { @@ -319,11 +271,11 @@ impl KDTree { } // Pruning on the farther side to check if there are better candidates - //TODO: Change this when implementing square distance + let axis_diff = query_vector[axis] - node.indexed_vector.vector[axis]; let dist_to_plane = match dist_type { - Similarity::Euclidean => query_vector[axis] - node.indexed_vector.vector[axis], - Similarity::Manhattan => 1.0, - _ => unreachable!(), + Similarity::Euclidean => axis_diff.abs(), + Similarity::Manhattan => axis_diff.abs(), + _ => 0.0, // Cosine/Hamming - no effective pruning, always search }; if heap.len() < k || dist_to_plane < heap.peek().unwrap().distance { @@ -340,7 +292,7 @@ impl VectorIndex for KDTree { } fn delete(&mut self, point_id: PointId) -> Result { - Ok(self.delete_point(point_id)) + Ok(self.delete_point(&point_id)) } fn search( @@ -350,9 +302,10 @@ impl VectorIndex for KDTree { k: usize, ) -> Result, DbError> { if matches!(similarity, Similarity::Cosine | Similarity::Hamming) { - panic!("Cosine and hamming are not suitable similariyt metric when using a KDTree") + return Err(DbError::UnsupportedSimilarity); } - Ok(vec![]) + let results = self.search_top_k(query_vector, k, similarity); + Ok(results.into_iter().map(|(id, _)| id).collect()) } } From 687e30c57af312aefad919d884d36a7dd3d0d47c Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Thu, 18 Dec 2025 10:12:34 +0000 Subject: [PATCH 05/25] Add rebuild --- crates/index/src/kd_tree.rs | 184 ++++++++++++++++++++++++++++++++++-- 1 file changed, 175 insertions(+), 9 deletions(-) diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 29f8fb3..08d7b48 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -12,15 +12,19 @@ pub struct KDTree { root: Option>, // In memory point ids, to check existence before O(n) deletion logic point_ids: HashSet, + // Rebuild tracking + total_nodes: usize, + deleted_count: usize, } // the node which will be the part of the KD Tree pub struct KDTreeNode { indexed_vector: IndexedVector, - split_dim: usize, left: Option>, right: Option>, is_deleted: bool, + + subtree_size: usize, } #[derive(Debug, Clone, PartialEq)] @@ -47,12 +51,18 @@ impl PartialOrd for Neighbor { } impl KDTree { + // Rebuild threshold + const BALANCE_THRESHOLD: f32 = 0.7; + const DELETE_REBUILD_RATIO: f32 = 0.25; + // Build an empty index with no points pub fn build_empty(dim: usize) -> Self { KDTree { dim, root: None, point_ids: HashSet::new(), + total_nodes: 0, + deleted_count: 0, } } @@ -73,6 +83,8 @@ impl KDTree { dim, root: Some(root_node), point_ids, + total_nodes: vectors.len(), + deleted_count: 0, }) } } @@ -120,41 +132,50 @@ impl KDTree { Box::new(KDTreeNode { indexed_vector: median_vec, - split_dim: axis, left, right, is_deleted: false, + subtree_size: vectors.len(), }) } pub fn insert_point(&mut self, new_vector: IndexedVector) { // Add to point_ids self.point_ids.insert(new_vector.id); + self.total_nodes += 1; // use a traverse function to get the final leaf where this belongs if self.root.is_none() { self.root = Some(Box::new(KDTreeNode { indexed_vector: new_vector, - split_dim: 0, left: None, right: None, is_deleted: false, + subtree_size: 1, })); return; } + let mut path: Vec<(usize, bool)> = Vec::new(); + let dim = self.dim; + let mut current_link = &mut self.root; let mut depth = 0; - let dim = self.dim; + // let dim = self.dim; while let Some(ref mut node_box) = current_link { let axis = depth % dim; let current_node = node_box.as_mut(); + current_node.subtree_size += 1; + let va = new_vector.vector[axis]; let vb = current_node.indexed_vector.vector[axis]; - if va <= vb { + let go_left = va <= vb; + path.push((depth, go_left)); + + if go_left { current_link = &mut current_node.left; } else { current_link = &mut current_node.right; @@ -163,14 +184,144 @@ impl KDTree { } // Assign the new node to current link which is &mut Option> - let axis = depth % dim; - *current_link = Some(Box::new(KDTreeNode { + let new_node = Box::new(KDTreeNode { indexed_vector: new_vector, - split_dim: axis, left: None, right: None, is_deleted: false, - })); + subtree_size: 1, + }); + + *current_link = Some(new_node); + + self.check_and_rebalance(&path); + } + + // Rebuild helper methods + fn is_unbalanced(node: &KDTreeNode) -> bool { + let left_size = node.left.as_ref().map_or(0, |n| n.subtree_size); + let right_size = node.right.as_ref().map_or(0, |n| n.subtree_size); + let max_child = left_size.max(right_size); + + max_child as f32 > Self::BALANCE_THRESHOLD * node.subtree_size as f32 + } + + fn collect_recursive(node: KDTreeNode, result: &mut Vec) { + if !node.is_deleted { + result.push(node.indexed_vector); + } + if let Some(left) = node.left { + Self::collect_recursive(*left, result); + } + if let Some(right) = node.right { + Self::collect_recursive(*right, result); + } + } + + fn collect_active_vectors(node: KDTreeNode) -> Vec { + let mut result = Vec::with_capacity(node.subtree_size); + Self::collect_recursive(node, &mut result); + result + } + + fn rebuild_at_depth(&mut self, path: &[(usize, bool)], target_depth: usize) { + let dim = self.dim; + + // Navigate to parent of target node + if target_depth == 0 { + // Rebuild root + if let Some(root) = self.root.take() { + let old_size = root.subtree_size; + let mut vectors = Self::collect_active_vectors(*root); + let new_size = vectors.len(); + if !vectors.is_empty() { + self.root = Some(Self::build_recursive(&mut vectors, 0, dim)); + } + // Update global counts as deleted nodes were purged + self.total_nodes -= old_size - new_size; + self.deleted_count = 0; + } + } else { + // Navigate to target node + let mut current_link = &mut self.root; + for (_depth, go_left) in path.iter().take(target_depth) { + let node = current_link.as_mut().unwrap(); + current_link = if *go_left { + &mut node.left + } else { + &mut node.right + }; + } + + // Rebuild tree at current link + if let Some(subtree_root) = current_link.take() { + let old_size = subtree_root.subtree_size; + let mut vectors = Self::collect_active_vectors(*subtree_root); + let new_size = vectors.len(); + + if !vectors.is_empty() { + *current_link = Some(Self::build_recursive(&mut vectors, target_depth, dim)); + } + + // Only update ancestors if size changed (deleted nodes were purged) + if old_size != new_size { + let size_diff = old_size - new_size; + self.subtract_size_from_ancestors(path, target_depth, size_diff); + + self.total_nodes -= size_diff; + self.deleted_count = self.deleted_count.saturating_sub(size_diff); + } + } + } + } + + fn subtract_size_from_ancestors( + &mut self, + path: &[(usize, bool)], + up_to_depth: usize, + diff: usize, + ) { + let mut current = &mut self.root; + for (_, go_left) in path.iter().take(up_to_depth) { + if let Some(node) = current { + node.subtree_size -= diff; + current = if *go_left { + &mut node.left + } else { + &mut node.right + }; + } + } + } + + fn check_and_rebalance(&mut self, path: &[(usize, bool)]) { + // Find the lowest depth where imbalance occurs + let mut unbalaced_depth: Option = None; + + let mut current = self.root.as_ref(); + + for (depth, go_left) in path { + if let Some(node) = current { + if Self::is_unbalanced(node) { + unbalaced_depth = Some(*depth); + break; + } + current = if *go_left { + node.left.as_ref() + } else { + node.right.as_ref() + }; + } + } + + if let Some(target_depth) = unbalaced_depth { + self.rebuild_at_depth(path, target_depth); + } + } + + fn should_rebuild_global(&self) -> bool { + self.total_nodes > 0 + && (self.deleted_count as f32 / self.total_nodes as f32) > Self::DELETE_REBUILD_RATIO } // Returns true if point found and deleted, else false @@ -178,8 +329,22 @@ impl KDTree { if self.point_ids.contains(point_id) { let deleted = Self::find_and_mark_deleted(&mut self.root, *point_id); if deleted { + self.deleted_count += 1; self.point_ids.remove(point_id); } + + if Self::should_rebuild_global(self) { + if let Some(root) = self.root.take() { + let mut vectors = Self::collect_active_vectors(*root); + if !vectors.is_empty() { + self.root = Some(Self::build_recursive(&mut vectors, 0, self.dim)); + } + + self.total_nodes = vectors.len(); + self.deleted_count = 0; + } + } + return deleted; } false @@ -286,6 +451,7 @@ impl KDTree { } impl VectorIndex for KDTree { + //TODO: Recalculate the total counts and deleted in main KD tree after rebuilds fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { self.insert_point(vector); Ok(()) From 496bfc2ae768301fb3b5b7b80b5dfaae7ca4d623 Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Thu, 18 Dec 2025 10:51:36 +0000 Subject: [PATCH 06/25] Add tests --- crates/index/src/kd_tree.rs | 500 ++++++++++++++++++++++++++++++++++-- 1 file changed, 484 insertions(+), 16 deletions(-) diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree.rs index 08d7b48..3e563b1 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree.rs @@ -130,12 +130,15 @@ impl KDTree { Some(Self::build_recursive(right_points, depth + 1, dim)) }; + let left_size = left.as_ref().map_or(0, |n| n.subtree_size); + let right_size = right.as_ref().map_or(0, |n| n.subtree_size); + Box::new(KDTreeNode { indexed_vector: median_vec, left, right, is_deleted: false, - subtree_size: vectors.len(), + subtree_size: left_size + right_size + 1, }) } @@ -295,26 +298,44 @@ impl KDTree { } fn check_and_rebalance(&mut self, path: &[(usize, bool)]) { - // Find the lowest depth where imbalance occurs - let mut unbalaced_depth: Option = None; + // Find the shallowest (closest to root) depth where imbalance occurs + // so that rebuilding fixes the largest unbalanced subtree + let mut unbalanced_depth: Option = None; let mut current = self.root.as_ref(); - for (depth, go_left) in path { + // Check root first (depth 0) + if let Some(node) = current { + if Self::is_unbalanced(node) { + unbalanced_depth = Some(0); + } + } + + // Then traverse the path and check each node + // Once we find the shallowest unbalanced node, break immediately + for (idx, (_depth, go_left)) in path.iter().enumerate() { + if unbalanced_depth.is_some() { + break; + } + if let Some(node) = current { - if Self::is_unbalanced(node) { - unbalaced_depth = Some(*depth); - break; - } current = if *go_left { node.left.as_ref() } else { node.right.as_ref() }; + + // Check the child node we just moved to (at depth idx + 1) + if let Some(child) = current { + if Self::is_unbalanced(child) { + unbalanced_depth = Some(idx + 1); + break; + } + } } } - if let Some(target_depth) = unbalaced_depth { + if let Some(target_depth) = unbalanced_depth { self.rebuild_at_depth(path, target_depth); } } @@ -436,14 +457,17 @@ impl KDTree { } // Pruning on the farther side to check if there are better candidates - let axis_diff = query_vector[axis] - node.indexed_vector.vector[axis]; - let dist_to_plane = match dist_type { - Similarity::Euclidean => axis_diff.abs(), - Similarity::Manhattan => axis_diff.abs(), - _ => 0.0, // Cosine/Hamming - no effective pruning, always search + // For Euclidean: the heap stores sqrt distances, so we compare axis_diff with the heap's max distance + // For Manhattan: direct comparison works since it's a sum of absolute differences + let axis_diff = (query_vector[axis] - node.indexed_vector.vector[axis]).abs(); + let should_search_far = match dist_type { + Similarity::Euclidean | Similarity::Manhattan => { + heap.len() < k || axis_diff < heap.peek().unwrap().distance + } + _ => true, // Cosine/Hamming - no effective pruning, always search }; - if heap.len() < k || dist_to_plane < heap.peek().unwrap().distance { + if should_search_far { self.search_recursive(far_side, query_vector, k, heap, depth + 1, dist_type); } } @@ -451,7 +475,6 @@ impl KDTree { } impl VectorIndex for KDTree { - //TODO: Recalculate the total counts and deleted in main KD tree after rebuilds fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { self.insert_point(vector); Ok(()) @@ -475,3 +498,448 @@ impl VectorIndex for KDTree { Ok(results.into_iter().map(|(id, _)| id).collect()) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn make_vector(vector: Vec) -> IndexedVector { + IndexedVector { + id: Uuid::new_v4(), + vector, + } + } + + fn make_vector_with_id(id: Uuid, vector: Vec) -> IndexedVector { + IndexedVector { id, vector } + } + + // Build Tests + + #[test] + fn test_build_empty() { + let tree = KDTree::build_empty(3); + assert!(tree.root.is_none()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 0); + assert!(tree.point_ids.is_empty()); + } + + #[test] + fn test_build_with_empty_vectors_returns_error() { + let result = KDTree::build(vec![]); + assert!(result.is_err()); + } + + #[test] + fn test_build_single_vector() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![1.0, 2.0, 3.0])]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); + } + + #[test] + fn test_build_multiple_vectors() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 2.0]), + make_vector_with_id(id2, vec![3.0, 4.0]), + make_vector_with_id(id3, vec![5.0, 6.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 2); + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); + } + + // Insert Tests + + #[test] + fn test_insert_into_empty_tree() { + let mut tree = KDTree::build_empty(2); + let id = Uuid::new_v4(); + let vector = make_vector_with_id(id, vec![1.0, 2.0]); + + let result = tree.insert(vector); + assert!(result.is_ok()); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); + assert!(tree.root.is_some()); + } + + #[test] + fn test_insert_multiple_vectors() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![1.0, 2.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![3.0, 4.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 6.0])) + .unwrap(); + + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); + } + + // Delete Tests + + #[test] + fn test_delete_existing_point() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + // Create enough vectors so deleting one doesn't trigger global rebuild + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + let result = tree.delete(ids[0]).unwrap(); + assert!(result); + assert!(!tree.point_ids.contains(&ids[0])); + assert_eq!(tree.deleted_count, 1); + } + + #[test] + fn test_delete_non_existing_point() { + let id1 = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id1, vec![1.0, 2.0])]; + let mut tree = KDTree::build(vectors).unwrap(); + + let non_existing_id = Uuid::new_v4(); + let result = tree.delete(non_existing_id).unwrap(); + assert!(!result); + assert_eq!(tree.deleted_count, 0); + } + + #[test] + fn test_delete_from_empty_tree() { + let mut tree = KDTree::build_empty(2); + let result = tree.delete(Uuid::new_v4()).unwrap(); + assert!(!result); + } + + #[test] + fn test_deleted_point_not_in_search_results() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete the closest point + tree.delete(id1).unwrap(); + + // Search should not return the deleted point + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert!(!results.contains(&id1)); + assert!(results.contains(&id2)); + } + + // Search Tests (VectorIndex trait) + + #[test] + fn test_search_empty_tree() { + let tree = KDTree::build_empty(2); + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 5) + .unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_search_euclidean() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); // Closest + assert_eq!(results[1], id2); // Second closest + } + + #[test] + fn test_search_manhattan() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![5.0, 5.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Manhattan, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); + assert_eq!(results[1], id2); + } + + #[test] + fn test_search_unsupported_similarity_cosine() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Cosine, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); + } + + #[test] + fn test_search_unsupported_similarity_hamming() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Hamming, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); + } + + #[test] + fn test_search_k_larger_than_tree_size() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 10) + .unwrap(); + assert_eq!(results.len(), 2); // Should return all available points + } + + #[test] + fn test_search_exact_match() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![5.0, 5.0]), + make_vector_with_id(id2, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id1); + } + + // Search Correctness Tests + + #[test] + fn test_search_correctness_3d() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0, 1.0]), + make_vector_with_id(id3, vec![2.0, 2.0, 2.0]), + make_vector_with_id(id4, vec![10.0, 10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.5, 0.5, 0.5], Similarity::Euclidean, 2) + .unwrap(); + // id1 at distance sqrt(0.75) ≈ 0.866 + // id2 at distance sqrt(0.75) ≈ 0.866 + // Both are equidistant, should return both + assert_eq!(results.len(), 2); + assert!(results.contains(&id1) || results.contains(&id2)); + } + + #[test] + fn test_search_after_insert() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![10.0, 10.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![1.0, 1.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 5.0])) + .unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results[0], id2); // Closest to origin + assert_eq!(results[1], id3); // Second closest + } + + #[test] + fn test_search_high_dimensional() { + let dim = 10; + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + + let vectors = vec![ + make_vector_with_id(id1, vec![0.0; dim]), + make_vector_with_id(id2, vec![1.0; dim]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let query = vec![0.1; dim]; + let results = tree.search(query, Similarity::Euclidean, 1).unwrap(); + assert_eq!(results[0], id1); // Closer to all-zeros + } + + // Rebalancing Tests + + #[test] + fn test_many_inserts_maintains_searchability() { + let mut tree = KDTree::build_empty(2); + let mut ids = Vec::new(); + + // Insert many points that would cause imbalance + for i in 0..20 { + let id = Uuid::new_v4(); + ids.push(id); + tree.insert(make_vector_with_id(id, vec![i as f32, i as f32])) + .unwrap(); + } + + // Search should still work correctly + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 5) + .unwrap(); + assert_eq!(results.len(), 5); + // First result should be the point at (0, 0) + assert_eq!(results[0], ids[0]); + } + + #[test] + fn test_delete_triggers_rebuild() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete enough points to trigger rebuild (> 25%) + for id in ids.iter().take(3) { + tree.delete(*id).unwrap(); + } + + // Tree should still function correctly + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 3) + .unwrap(); + assert_eq!(results.len(), 3); + // Deleted points should not appear + for id in ids.iter().take(3) { + assert!(!results.contains(id)); + } + } + + // ==================== Edge Cases ==================== + + #[test] + fn test_single_point_search() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![5.0, 5.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id); + } + + #[test] + fn test_duplicate_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), // Same coordinates + make_vector_with_id(id3, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 1.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + // Both id1 and id2 should be in results (both at distance 0) + assert!(results.contains(&id1) || results.contains(&id2)); + } + + #[test] + fn test_negative_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![-1.0, -1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![-0.5, -0.5], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results[0], id1); + } + + #[test] + fn test_search_with_zero_k() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 0) + .unwrap(); + assert!(results.is_empty()); + } +} From 327bb031f08ac3059814b88548130a40e69a998f Mon Sep 17 00:00:00 2001 From: Adesh Gupta Date: Sat, 27 Dec 2025 13:36:56 +0000 Subject: [PATCH 07/25] Refactor code --- Cargo.lock | 25 - crates/index/Cargo.toml | 3 +- .../src/{kd_tree.rs => kd_tree/index.rs} | 497 +------------ crates/index/src/kd_tree/mod.rs | 5 + crates/index/src/kd_tree/tests.rs | 703 ++++++++++++++++++ crates/index/src/kd_tree/types.rs | 37 + 6 files changed, 755 insertions(+), 515 deletions(-) rename crates/index/src/{kd_tree.rs => kd_tree/index.rs} (50%) create mode 100644 crates/index/src/kd_tree/mod.rs create mode 100644 crates/index/src/kd_tree/tests.rs create mode 100644 crates/index/src/kd_tree/types.rs diff --git a/Cargo.lock b/Cargo.lock index 94b4c6b..cf44b74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,12 +66,6 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "axum" version = "0.8.8" @@ -896,7 +890,6 @@ name = "index" version = "0.1.0" dependencies = [ "defs", - "ordered-float", "uuid", ] @@ -1188,15 +1181,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - [[package]] name = "object" version = "0.37.3" @@ -1256,15 +1240,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - [[package]] name = "owo-colors" version = "4.2.3" diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index bb610cf..35f9957 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -7,6 +7,5 @@ edition.workspace = true license.workspace = true [dependencies] -defs = { path = "../defs" } -ordered-float = "5.0.0" +defs.workspace = true uuid.workspace = true diff --git a/crates/index/src/kd_tree.rs b/crates/index/src/kd_tree/index.rs similarity index 50% rename from crates/index/src/kd_tree.rs rename to crates/index/src/kd_tree/index.rs index fbc1943..dc623a2 100644 --- a/crates/index/src/kd_tree.rs +++ b/crates/index/src/kd_tree/index.rs @@ -1,3 +1,4 @@ +use super::types::{KDTreeNode, Neighbor}; use crate::{VectorIndex, distance}; use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; use std::{ @@ -8,46 +9,13 @@ use std::{ use uuid::Uuid; pub struct KDTree { - dim: usize, - root: Option>, + pub dim: usize, + pub root: Option>, // In memory point ids, to check existence before O(n) deletion logic - point_ids: HashSet, + pub point_ids: HashSet, // Rebuild tracking - total_nodes: usize, - deleted_count: usize, -} - -// the node which will be the part of the KD Tree -pub struct KDTreeNode { - indexed_vector: IndexedVector, - left: Option>, - right: Option>, - is_deleted: bool, - - subtree_size: usize, -} - -#[derive(Debug, Clone, PartialEq)] -struct Neighbor { - id: PointId, - distance: f32, -} - -impl Eq for Neighbor {} - -// Custom Ord implementation for the max-heap -impl Ord for Neighbor { - fn cmp(&self, other: &Self) -> Ordering { - self.distance - .partial_cmp(&other.distance) - .unwrap_or(Ordering::Equal) - } -} - -impl PartialOrd for Neighbor { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } + pub total_nodes: usize, + pub deleted_count: usize, } impl KDTree { @@ -164,7 +132,6 @@ impl KDTree { let mut current_link = &mut self.root; let mut depth = 0; - // let dim = self.dim; while let Some(node_box) = current_link { let axis = depth % dim; @@ -438,7 +405,6 @@ impl KDTree { // Recurse on near side first self.search_recursive(near_side, query_vector, k, heap, depth + 1, dist_type); - // Process the current node if !node.is_deleted { // TODO: Possible overhead, here heap stores sqrt euclidean distance, we can eliminate that by storing squared distances in case of euclidean let distance = distance(query_vector, &node.indexed_vector.vector, dist_type); @@ -457,12 +423,12 @@ impl KDTree { } // Pruning on the farther side to check if there are better candidates - // For Euclidean: the heap stores sqrt distances, so we compare axis_diff with the heap's max distance - // For Manhattan: direct comparison works since it's a sum of absolute differences + // Use <= to handle ties: when axis_diff == current worst distance, there could be + // a point on the far side with the same distance that should be included let axis_diff = (query_vector[axis] - node.indexed_vector.vector[axis]).abs(); let should_search_far = match dist_type { Similarity::Euclidean | Similarity::Manhattan => { - heap.len() < k || axis_diff < heap.peek().unwrap().distance + heap.len() < k || axis_diff <= heap.peek().unwrap().distance } _ => true, // Cosine/Hamming - no effective pruning, always search }; @@ -498,448 +464,3 @@ impl VectorIndex for KDTree { Ok(results.into_iter().map(|(id, _)| id).collect()) } } - -#[cfg(test)] -mod tests { - use super::*; - - fn make_vector(vector: Vec) -> IndexedVector { - IndexedVector { - id: Uuid::new_v4(), - vector, - } - } - - fn make_vector_with_id(id: Uuid, vector: Vec) -> IndexedVector { - IndexedVector { id, vector } - } - - // Build Tests - - #[test] - fn test_build_empty() { - let tree = KDTree::build_empty(3); - assert!(tree.root.is_none()); - assert_eq!(tree.dim, 3); - assert_eq!(tree.total_nodes, 0); - assert!(tree.point_ids.is_empty()); - } - - #[test] - fn test_build_with_empty_vectors_returns_error() { - let result = KDTree::build(vec![]); - assert!(result.is_err()); - } - - #[test] - fn test_build_single_vector() { - let id = Uuid::new_v4(); - let vectors = vec![make_vector_with_id(id, vec![1.0, 2.0, 3.0])]; - let tree = KDTree::build(vectors).unwrap(); - - assert!(tree.root.is_some()); - assert_eq!(tree.dim, 3); - assert_eq!(tree.total_nodes, 1); - assert!(tree.point_ids.contains(&id)); - } - - #[test] - fn test_build_multiple_vectors() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 2.0]), - make_vector_with_id(id2, vec![3.0, 4.0]), - make_vector_with_id(id3, vec![5.0, 6.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - assert!(tree.root.is_some()); - assert_eq!(tree.dim, 2); - assert_eq!(tree.total_nodes, 3); - assert!(tree.point_ids.contains(&id1)); - assert!(tree.point_ids.contains(&id2)); - assert!(tree.point_ids.contains(&id3)); - } - - // Insert Tests - - #[test] - fn test_insert_into_empty_tree() { - let mut tree = KDTree::build_empty(2); - let id = Uuid::new_v4(); - let vector = make_vector_with_id(id, vec![1.0, 2.0]); - - let result = tree.insert(vector); - assert!(result.is_ok()); - assert_eq!(tree.total_nodes, 1); - assert!(tree.point_ids.contains(&id)); - assert!(tree.root.is_some()); - } - - #[test] - fn test_insert_multiple_vectors() { - let mut tree = KDTree::build_empty(2); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - - tree.insert(make_vector_with_id(id1, vec![1.0, 2.0])) - .unwrap(); - tree.insert(make_vector_with_id(id2, vec![3.0, 4.0])) - .unwrap(); - tree.insert(make_vector_with_id(id3, vec![5.0, 6.0])) - .unwrap(); - - assert_eq!(tree.total_nodes, 3); - assert!(tree.point_ids.contains(&id1)); - assert!(tree.point_ids.contains(&id2)); - assert!(tree.point_ids.contains(&id3)); - } - - // Delete Tests - - #[test] - fn test_delete_existing_point() { - let mut ids = Vec::new(); - let mut vectors = Vec::new(); - - // Create enough vectors so deleting one doesn't trigger global rebuild - for i in 0..10 { - let id = Uuid::new_v4(); - ids.push(id); - vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); - } - - let mut tree = KDTree::build(vectors).unwrap(); - - let result = tree.delete(ids[0]).unwrap(); - assert!(result); - assert!(!tree.point_ids.contains(&ids[0])); - assert_eq!(tree.deleted_count, 1); - } - - #[test] - fn test_delete_non_existing_point() { - let id1 = Uuid::new_v4(); - let vectors = vec![make_vector_with_id(id1, vec![1.0, 2.0])]; - let mut tree = KDTree::build(vectors).unwrap(); - - let non_existing_id = Uuid::new_v4(); - let result = tree.delete(non_existing_id).unwrap(); - assert!(!result); - assert_eq!(tree.deleted_count, 0); - } - - #[test] - fn test_delete_from_empty_tree() { - let mut tree = KDTree::build_empty(2); - let result = tree.delete(Uuid::new_v4()).unwrap(); - assert!(!result); - } - - #[test] - fn test_deleted_point_not_in_search_results() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![0.0, 0.0]), - make_vector_with_id(id2, vec![1.0, 1.0]), - make_vector_with_id(id3, vec![10.0, 10.0]), - ]; - let mut tree = KDTree::build(vectors).unwrap(); - - // Delete the closest point - tree.delete(id1).unwrap(); - - // Search should not return the deleted point - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert!(!results.contains(&id1)); - assert!(results.contains(&id2)); - } - - // Search Tests (VectorIndex trait) - - #[test] - fn test_search_empty_tree() { - let tree = KDTree::build_empty(2); - let results = tree - .search(vec![1.0, 2.0], Similarity::Euclidean, 5) - .unwrap(); - assert!(results.is_empty()); - } - - #[test] - fn test_search_euclidean() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![2.0, 2.0]), - make_vector_with_id(id3, vec![10.0, 10.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results.len(), 2); - assert_eq!(results[0], id1); // Closest - assert_eq!(results[1], id2); // Second closest - } - - #[test] - fn test_search_manhattan() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![2.0, 2.0]), - make_vector_with_id(id3, vec![5.0, 5.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Manhattan, 2) - .unwrap(); - assert_eq!(results.len(), 2); - assert_eq!(results[0], id1); - assert_eq!(results[1], id2); - } - - #[test] - fn test_search_unsupported_similarity_cosine() { - let vectors = vec![make_vector(vec![1.0, 2.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let result = tree.search(vec![1.0, 2.0], Similarity::Cosine, 1); - assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); - } - - #[test] - fn test_search_unsupported_similarity_hamming() { - let vectors = vec![make_vector(vec![1.0, 2.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let result = tree.search(vec![1.0, 2.0], Similarity::Hamming, 1); - assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); - } - - #[test] - fn test_search_k_larger_than_tree_size() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![2.0, 2.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 10) - .unwrap(); - assert_eq!(results.len(), 2); // Should return all available points - } - - #[test] - fn test_search_exact_match() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![5.0, 5.0]), - make_vector_with_id(id2, vec![10.0, 10.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![5.0, 5.0], Similarity::Euclidean, 1) - .unwrap(); - assert_eq!(results.len(), 1); - assert_eq!(results[0], id1); - } - - // Search Correctness Tests - - #[test] - fn test_search_correctness_3d() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let id4 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![0.0, 0.0, 0.0]), - make_vector_with_id(id2, vec![1.0, 1.0, 1.0]), - make_vector_with_id(id3, vec![2.0, 2.0, 2.0]), - make_vector_with_id(id4, vec![10.0, 10.0, 10.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.5, 0.5, 0.5], Similarity::Euclidean, 2) - .unwrap(); - // id1 at distance sqrt(0.75) ≈ 0.866 - // id2 at distance sqrt(0.75) ≈ 0.866 - // Both are equidistant, should return both - assert_eq!(results.len(), 2); - assert!(results.contains(&id1) || results.contains(&id2)); - } - - #[test] - fn test_search_after_insert() { - let mut tree = KDTree::build_empty(2); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - - tree.insert(make_vector_with_id(id1, vec![10.0, 10.0])) - .unwrap(); - tree.insert(make_vector_with_id(id2, vec![1.0, 1.0])) - .unwrap(); - tree.insert(make_vector_with_id(id3, vec![5.0, 5.0])) - .unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results[0], id2); // Closest to origin - assert_eq!(results[1], id3); // Second closest - } - - #[test] - fn test_search_high_dimensional() { - let dim = 10; - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - - let vectors = vec![ - make_vector_with_id(id1, vec![0.0; dim]), - make_vector_with_id(id2, vec![1.0; dim]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let query = vec![0.1; dim]; - let results = tree.search(query, Similarity::Euclidean, 1).unwrap(); - assert_eq!(results[0], id1); // Closer to all-zeros - } - - // Rebalancing Tests - - #[test] - fn test_many_inserts_maintains_searchability() { - let mut tree = KDTree::build_empty(2); - let mut ids = Vec::new(); - - // Insert many points that would cause imbalance - for i in 0..20 { - let id = Uuid::new_v4(); - ids.push(id); - tree.insert(make_vector_with_id(id, vec![i as f32, i as f32])) - .unwrap(); - } - - // Search should still work correctly - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 5) - .unwrap(); - assert_eq!(results.len(), 5); - // First result should be the point at (0, 0) - assert_eq!(results[0], ids[0]); - } - - #[test] - fn test_delete_triggers_rebuild() { - let mut ids = Vec::new(); - let mut vectors = Vec::new(); - - for i in 0..10 { - let id = Uuid::new_v4(); - ids.push(id); - vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); - } - - let mut tree = KDTree::build(vectors).unwrap(); - - // Delete enough points to trigger rebuild (> 25%) - for id in ids.iter().take(3) { - tree.delete(*id).unwrap(); - } - - // Tree should still function correctly - let results = tree - .search(vec![5.0, 5.0], Similarity::Euclidean, 3) - .unwrap(); - assert_eq!(results.len(), 3); - // Deleted points should not appear - for id in ids.iter().take(3) { - assert!(!results.contains(id)); - } - } - - // ==================== Edge Cases ==================== - - #[test] - fn test_single_point_search() { - let id = Uuid::new_v4(); - let vectors = vec![make_vector_with_id(id, vec![5.0, 5.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![0.0, 0.0], Similarity::Euclidean, 1) - .unwrap(); - assert_eq!(results.len(), 1); - assert_eq!(results[0], id); - } - - #[test] - fn test_duplicate_coordinates() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![1.0, 1.0]), - make_vector_with_id(id2, vec![1.0, 1.0]), // Same coordinates - make_vector_with_id(id3, vec![2.0, 2.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![1.0, 1.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results.len(), 2); - // Both id1 and id2 should be in results (both at distance 0) - assert!(results.contains(&id1) || results.contains(&id2)); - } - - #[test] - fn test_negative_coordinates() { - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let vectors = vec![ - make_vector_with_id(id1, vec![-1.0, -1.0]), - make_vector_with_id(id2, vec![1.0, 1.0]), - ]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![-0.5, -0.5], Similarity::Euclidean, 1) - .unwrap(); - assert_eq!(results[0], id1); - } - - #[test] - fn test_search_with_zero_k() { - let vectors = vec![make_vector(vec![1.0, 2.0])]; - let tree = KDTree::build(vectors).unwrap(); - - let results = tree - .search(vec![1.0, 2.0], Similarity::Euclidean, 0) - .unwrap(); - assert!(results.is_empty()); - } -} diff --git a/crates/index/src/kd_tree/mod.rs b/crates/index/src/kd_tree/mod.rs new file mode 100644 index 0000000..8765acf --- /dev/null +++ b/crates/index/src/kd_tree/mod.rs @@ -0,0 +1,5 @@ +pub mod index; +pub mod types; + +#[cfg(test)] +mod tests; diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs new file mode 100644 index 0000000..faefae5 --- /dev/null +++ b/crates/index/src/kd_tree/tests.rs @@ -0,0 +1,703 @@ +use super::index::KDTree; +use crate::VectorIndex; +use crate::distance; +use crate::flat::FlatIndex; +use defs::{DbError, IndexedVector, Similarity}; +use std::collections::HashSet; +use uuid::Uuid; + +fn make_vector(vector: Vec) -> IndexedVector { + IndexedVector { + id: Uuid::new_v4(), + vector, + } +} + +fn make_vector_with_id(id: Uuid, vector: Vec) -> IndexedVector { + IndexedVector { id, vector } +} + +// Build Tests + +#[test] +fn test_build_empty() { + let tree = KDTree::build_empty(3); + assert!(tree.root.is_none()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 0); + assert!(tree.point_ids.is_empty()); +} + +#[test] +fn test_build_with_empty_vectors_returns_error() { + let result = KDTree::build(vec![]); + assert!(result.is_err()); +} + +#[test] +fn test_build_single_vector() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![1.0, 2.0, 3.0])]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); +} + +#[test] +fn test_build_multiple_vectors() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 2.0]), + make_vector_with_id(id2, vec![3.0, 4.0]), + make_vector_with_id(id3, vec![5.0, 6.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 2); + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); +} + +// Insert Tests + +#[test] +fn test_insert_into_empty_tree() { + let mut tree = KDTree::build_empty(2); + let id = Uuid::new_v4(); + let vector = make_vector_with_id(id, vec![1.0, 2.0]); + + let result = tree.insert(vector); + assert!(result.is_ok()); + assert_eq!(tree.total_nodes, 1); + assert!(tree.point_ids.contains(&id)); + assert!(tree.root.is_some()); +} + +#[test] +fn test_insert_multiple_vectors() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![1.0, 2.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![3.0, 4.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 6.0])) + .unwrap(); + + assert_eq!(tree.total_nodes, 3); + assert!(tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); +} + +// Delete Tests + +#[test] +fn test_delete_existing_point() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + // Create enough vectors so deleting one doesn't trigger global rebuild + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + let result = tree.delete(ids[0]).unwrap(); + assert!(result); + assert!(!tree.point_ids.contains(&ids[0])); + assert_eq!(tree.deleted_count, 1); +} + +#[test] +fn test_delete_non_existing_point() { + let id1 = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id1, vec![1.0, 2.0])]; + let mut tree = KDTree::build(vectors).unwrap(); + + let non_existing_id = Uuid::new_v4(); + let result = tree.delete(non_existing_id).unwrap(); + assert!(!result); + assert_eq!(tree.deleted_count, 0); +} + +#[test] +fn test_delete_from_empty_tree() { + let mut tree = KDTree::build_empty(2); + let result = tree.delete(Uuid::new_v4()).unwrap(); + assert!(!result); +} + +#[test] +fn test_deleted_point_not_in_search_results() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete the closest point + tree.delete(id1).unwrap(); + + // Search should not return the deleted point + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert!(!results.contains(&id1)); + assert!(results.contains(&id2)); +} + +// Search Tests (VectorIndex trait) + +#[test] +fn test_search_empty_tree() { + let tree = KDTree::build_empty(2); + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 5) + .unwrap(); + assert!(results.is_empty()); +} + +#[test] +fn test_search_euclidean() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); // Closest + assert_eq!(results[1], id2); // Second closest +} + +#[test] +fn test_search_manhattan() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + make_vector_with_id(id3, vec![5.0, 5.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Manhattan, 2) + .unwrap(); + assert_eq!(results.len(), 2); + assert_eq!(results[0], id1); + assert_eq!(results[1], id2); +} + +#[test] +fn test_search_unsupported_similarity_cosine() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Cosine, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); +} + +#[test] +fn test_search_unsupported_similarity_hamming() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let result = tree.search(vec![1.0, 2.0], Similarity::Hamming, 1); + assert!(matches!(result, Err(DbError::UnsupportedSimilarity))); +} + +#[test] +fn test_search_k_larger_than_tree_size() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 10) + .unwrap(); + assert_eq!(results.len(), 2); // Should return all available points +} + +#[test] +fn test_search_exact_match() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![5.0, 5.0]), + make_vector_with_id(id2, vec![10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id1); +} + +// Search Correctness Tests + +#[test] +fn test_search_correctness_3d() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![0.0, 0.0, 0.0]), + make_vector_with_id(id2, vec![1.0, 1.0, 1.0]), + make_vector_with_id(id3, vec![2.0, 2.0, 2.0]), + make_vector_with_id(id4, vec![10.0, 10.0, 10.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.5, 0.5, 0.5], Similarity::Euclidean, 2) + .unwrap(); + // id1 at distance sqrt(0.75) ≈ 0.866 + // id2 at distance sqrt(0.75) ≈ 0.866 + // Both are equidistant, should return both + assert_eq!(results.len(), 2); + assert!(results.contains(&id1) || results.contains(&id2)); +} + +#[test] +fn test_search_after_insert() { + let mut tree = KDTree::build_empty(2); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + + tree.insert(make_vector_with_id(id1, vec![10.0, 10.0])) + .unwrap(); + tree.insert(make_vector_with_id(id2, vec![1.0, 1.0])) + .unwrap(); + tree.insert(make_vector_with_id(id3, vec![5.0, 5.0])) + .unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results[0], id2); // Closest to origin + assert_eq!(results[1], id3); // Second closest +} + +#[test] +fn test_search_high_dimensional() { + let dim = 10; + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + + let vectors = vec![ + make_vector_with_id(id1, vec![0.0; dim]), + make_vector_with_id(id2, vec![1.0; dim]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let query = vec![0.1; dim]; + let results = tree.search(query, Similarity::Euclidean, 1).unwrap(); + assert_eq!(results[0], id1); // Closer to all-zeros +} + +// Rebalancing Tests + +#[test] +fn test_many_inserts_maintains_searchability() { + let mut tree = KDTree::build_empty(2); + let mut ids = Vec::new(); + + // Insert many points that would cause imbalance + for i in 0..20 { + let id = Uuid::new_v4(); + ids.push(id); + tree.insert(make_vector_with_id(id, vec![i as f32, i as f32])) + .unwrap(); + } + + // Search should still work correctly + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 5) + .unwrap(); + assert_eq!(results.len(), 5); + // First result should be the point at (0, 0) + assert_eq!(results[0], ids[0]); +} + +#[test] +fn test_delete_triggers_rebuild() { + let mut ids = Vec::new(); + let mut vectors = Vec::new(); + + for i in 0..10 { + let id = Uuid::new_v4(); + ids.push(id); + vectors.push(make_vector_with_id(id, vec![i as f32, i as f32])); + } + + let mut tree = KDTree::build(vectors).unwrap(); + + // Delete enough points to trigger rebuild (> 25%) + for id in ids.iter().take(3) { + tree.delete(*id).unwrap(); + } + + // Tree should still function correctly + let results = tree + .search(vec![5.0, 5.0], Similarity::Euclidean, 3) + .unwrap(); + assert_eq!(results.len(), 3); + // Deleted points should not appear + for id in ids.iter().take(3) { + assert!(!results.contains(id)); + } +} + +// Edge Cases + +#[test] +fn test_single_point_search() { + let id = Uuid::new_v4(); + let vectors = vec![make_vector_with_id(id, vec![5.0, 5.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![0.0, 0.0], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], id); +} + +#[test] +fn test_duplicate_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), // Same coordinates + make_vector_with_id(id3, vec![2.0, 2.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 1.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results.len(), 2); + // Both id1 and id2 should be in results (both at distance 0) + assert!(results.contains(&id1) || results.contains(&id2)); +} + +#[test] +fn test_negative_coordinates() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let vectors = vec![ + make_vector_with_id(id1, vec![-1.0, -1.0]), + make_vector_with_id(id2, vec![1.0, 1.0]), + ]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![-0.5, -0.5], Similarity::Euclidean, 1) + .unwrap(); + assert_eq!(results[0], id1); +} + +#[test] +fn test_search_with_zero_k() { + let vectors = vec![make_vector(vec![1.0, 2.0])]; + let tree = KDTree::build(vectors).unwrap(); + + let results = tree + .search(vec![1.0, 2.0], Similarity::Euclidean, 0) + .unwrap(); + assert!(results.is_empty()); +} + +// Comparison Tests: KDTree vs FlatIndex + +/// Helper to create a fixed set of 10 vectors with known UUIDs for comparison tests +fn create_test_vectors_2d() -> Vec { + let ids: Vec = (0..10).map(|_| Uuid::new_v4()).collect(); + vec![ + make_vector_with_id(ids[0], vec![0.5, 0.5]), + make_vector_with_id(ids[1], vec![2.3, 1.7]), + make_vector_with_id(ids[2], vec![-1.0, 3.0]), + make_vector_with_id(ids[3], vec![4.5, -2.0]), + make_vector_with_id(ids[4], vec![7.0, 7.0]), + make_vector_with_id(ids[5], vec![-3.5, -1.5]), + make_vector_with_id(ids[6], vec![1.0, 5.0]), + make_vector_with_id(ids[7], vec![6.0, 2.0]), + make_vector_with_id(ids[8], vec![-2.0, -4.0]), + make_vector_with_id(ids[9], vec![3.0, 3.0]), + ] +} + +fn create_test_vectors_3d() -> Vec { + let ids: Vec = (0..10).map(|_| Uuid::new_v4()).collect(); + vec![ + make_vector_with_id(ids[0], vec![1.0, 2.0, 3.0]), + make_vector_with_id(ids[1], vec![-1.5, 0.5, 2.0]), + make_vector_with_id(ids[2], vec![4.0, 4.0, 4.0]), + make_vector_with_id(ids[3], vec![0.0, 0.0, 0.0]), + make_vector_with_id(ids[4], vec![2.5, -1.0, 3.5]), + make_vector_with_id(ids[5], vec![-2.0, 3.0, -1.0]), + make_vector_with_id(ids[6], vec![5.0, 1.0, 2.0]), + make_vector_with_id(ids[7], vec![3.0, 3.0, 3.0]), + make_vector_with_id(ids[8], vec![-0.5, -0.5, 1.0]), + make_vector_with_id(ids[9], vec![1.5, 2.5, 0.5]), + ] +} + +/// Helper to verify that two result sets are valid k-nearest neighbor results +/// Both should return the k closest points (by distance), but may differ on tie-breaking +fn verify_same_results( + tree_results: &[Uuid], + flat_results: &[Uuid], + vectors: &[IndexedVector], + query: &[f32], + similarity: Similarity, + k: usize, +) { + // Same length + assert_eq!( + tree_results.len(), + flat_results.len(), + "Result lengths differ" + ); + + // Both should return at most k results + assert!(tree_results.len() <= k); + + // Get distances for all results + let query_vec = query.to_vec(); + let get_distance = |id: &Uuid| -> f32 { + let vec = vectors.iter().find(|v| v.id == *id).unwrap(); + distance(&vec.vector, &query_vec, similarity) + }; + + // Verify tree results are sorted by distance + for i in 1..tree_results.len() { + let d1 = get_distance(&tree_results[i - 1]); + let d2 = get_distance(&tree_results[i]); + assert!( + d1 <= d2 + 1e-6, + "KDTree results not sorted: {} > {}", + d1, + d2 + ); + } + + // Verify flat results are sorted by distance + for i in 1..flat_results.len() { + let d1 = get_distance(&flat_results[i - 1]); + let d2 = get_distance(&flat_results[i]); + assert!(d1 <= d2 + 1e-6, "Flat results not sorted: {} > {}", d1, d2); + } + + // The maximum distance in both result sets should be the same (k-th nearest distance) + if !tree_results.is_empty() { + let tree_max_dist = get_distance(tree_results.last().unwrap()); + let flat_max_dist = get_distance(flat_results.last().unwrap()); + assert!( + (tree_max_dist - flat_max_dist).abs() < 1e-6, + "Max distances differ: tree={}, flat={}", + tree_max_dist, + flat_max_dist + ); + } + + // Verify that for each result in tree_results, either: + // 1. It's also in flat_results, OR + // 2. It has the same distance as the last element (tie-breaking difference) + let flat_set: HashSet<_> = flat_results.iter().collect(); + let flat_max_dist = if flat_results.is_empty() { + 0.0 + } else { + get_distance(flat_results.last().unwrap()) + }; + + for id in tree_results { + if !flat_set.contains(id) { + // This ID is not in flat results, verify it's a tie + let dist = get_distance(id); + assert!( + (dist - flat_max_dist).abs() < 1e-6, + "KDTree returned {:?} with distance {} but it's not in flat results and not a tie (flat max: {})", + id, + dist, + flat_max_dist + ); + } + } + + // Similarly verify flat_results + let tree_set: HashSet<_> = tree_results.iter().collect(); + let tree_max_dist = if tree_results.is_empty() { + 0.0 + } else { + get_distance(tree_results.last().unwrap()) + }; + + for id in flat_results { + if !tree_set.contains(id) { + let dist = get_distance(id); + assert!( + (dist - tree_max_dist).abs() < 1e-6, + "Flat returned {:?} with distance {} but it's not in tree results and not a tie (tree max: {})", + id, + dist, + tree_max_dist + ); + } + } +} + +#[test] +fn test_kdtree_vs_flat_euclidean_2d() { + let vectors = create_test_vectors_2d(); + let tree = KDTree::build(vectors.clone()).unwrap(); + let flat = FlatIndex::build(vectors.clone()); + + // Test multiple query points and different k values + let queries = vec![ + vec![0.0, 0.0], + vec![3.0, 3.0], + vec![-1.0, 2.0], + vec![5.0, 5.0], + ]; + + for query in queries { + for k in [1, 3, 5, 10] { + let tree_results = tree + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + let flat_results = flat + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + + verify_same_results( + &tree_results, + &flat_results, + &vectors, + &query, + Similarity::Euclidean, + k, + ); + } + } +} + +#[test] +fn test_kdtree_vs_flat_euclidean_3d() { + let vectors = create_test_vectors_3d(); + let tree = KDTree::build(vectors.clone()).unwrap(); + let flat = FlatIndex::build(vectors.clone()); + + let queries = vec![ + vec![0.0, 0.0, 0.0], + vec![2.0, 2.0, 2.0], + vec![-1.0, 1.0, 1.0], + vec![4.0, 3.0, 3.0], + ]; + + for query in queries { + for k in [1, 3, 5, 10] { + let tree_results = tree + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + let flat_results = flat + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + + verify_same_results( + &tree_results, + &flat_results, + &vectors, + &query, + Similarity::Euclidean, + k, + ); + } + } +} + +#[test] +fn test_kdtree_vs_flat_euclidean_5d() { + // Test with higher dimensionality + let ids: Vec = (0..10).map(|_| Uuid::new_v4()).collect(); + let vectors = vec![ + make_vector_with_id(ids[0], vec![1.0, 2.0, 3.0, 4.0, 5.0]), + make_vector_with_id(ids[1], vec![-1.0, 0.0, 1.0, 2.0, 3.0]), + make_vector_with_id(ids[2], vec![5.0, 4.0, 3.0, 2.0, 1.0]), + make_vector_with_id(ids[3], vec![0.0, 0.0, 0.0, 0.0, 0.0]), + make_vector_with_id(ids[4], vec![2.5, 2.5, 2.5, 2.5, 2.5]), + make_vector_with_id(ids[5], vec![-2.0, -1.0, 0.0, 1.0, 2.0]), + make_vector_with_id(ids[6], vec![3.0, 3.0, 3.0, 3.0, 3.0]), + make_vector_with_id(ids[7], vec![1.0, 1.0, 1.0, 1.0, 1.0]), + make_vector_with_id(ids[8], vec![4.0, 0.0, -1.0, 2.0, 5.0]), + make_vector_with_id(ids[9], vec![-0.5, 1.5, 2.5, 3.5, 4.5]), + ]; + + let tree = KDTree::build(vectors.clone()).unwrap(); + let flat = FlatIndex::build(vectors.clone()); + + let queries = vec![ + vec![0.0, 0.0, 0.0, 0.0, 0.0], + vec![2.0, 2.0, 2.0, 2.0, 2.0], + vec![1.0, 2.0, 3.0, 4.0, 5.0], + vec![-1.0, -1.0, 0.0, 1.0, 1.0], + ]; + + for query in queries { + for k in [1, 3, 5, 10] { + let tree_results = tree + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + let flat_results = flat + .search(query.clone(), Similarity::Euclidean, k) + .unwrap(); + + verify_same_results( + &tree_results, + &flat_results, + &vectors, + &query, + Similarity::Euclidean, + k, + ); + } + } +} diff --git a/crates/index/src/kd_tree/types.rs b/crates/index/src/kd_tree/types.rs new file mode 100644 index 0000000..9999cb3 --- /dev/null +++ b/crates/index/src/kd_tree/types.rs @@ -0,0 +1,37 @@ +use std::cmp::Ordering; + +use defs::{IndexedVector, PointId}; + +// the node which will be the part of the KD Tree +pub struct KDTreeNode { + pub indexed_vector: IndexedVector, + pub left: Option>, + pub right: Option>, + pub is_deleted: bool, + + pub subtree_size: usize, +} + +// The struct definition which is present in max heap while search +#[derive(Debug, Clone, PartialEq)] +pub struct Neighbor { + pub id: PointId, + pub distance: f32, +} + +impl Eq for Neighbor {} + +// Custom Ord implementation for the max-heap +impl Ord for Neighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance + .partial_cmp(&other.distance) + .unwrap_or(Ordering::Equal) + } +} + +impl PartialOrd for Neighbor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} From 7661e7e2cbb77a3eebf972481940c07f3cfe3585 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Thu, 1 Jan 2026 06:07:06 +0530 Subject: [PATCH 08/25] implement kdtree serialization --- Cargo.lock | 1 + checkpoint/000009.sst | Bin 0 -> 1137 bytes checkpoint/CURRENT | 1 + checkpoint/MANIFEST-000005 | Bin 0 -> 240 bytes checkpoint/OPTIONS-000007 | 199 ++++++++++++++++++++++++++ crates/index/Cargo.toml | 1 + crates/index/src/kd_tree/mod.rs | 1 + crates/index/src/kd_tree/serialize.rs | 34 +++++ crates/index/src/lib.rs | 4 + 9 files changed, 241 insertions(+) create mode 100644 checkpoint/000009.sst create mode 100644 checkpoint/CURRENT create mode 100644 checkpoint/MANIFEST-000005 create mode 100644 checkpoint/OPTIONS-000007 create mode 100644 crates/index/src/kd_tree/serialize.rs diff --git a/Cargo.lock b/Cargo.lock index cf44b74..fa047df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -890,6 +890,7 @@ name = "index" version = "0.1.0" dependencies = [ "defs", + "serde", "uuid", ] diff --git a/checkpoint/000009.sst b/checkpoint/000009.sst new file mode 100644 index 0000000000000000000000000000000000000000..02a4aa1e3e0109ac593d029d3b076af14f964a60 GIT binary patch literal 1137 zcma)5&rcIU6yA|Sw@?aL#Ykd;2{+kEOMkIJAPNVSC>~4>+18>b-OJ9rH{bi-kMCLYD~p9%p1e(S;`)S^^rZgBy%15G-sKX7Z@L9qFg{pnGgC+LOqScpQYdK?26Rm{Dswl@3V& z6dEMvYSf+yT0{^P2@X5JHUH(Vf;N+|7X9{Q)8hi9WKnn{GS>bjZ$Cx#sHL=BwsnqC zyY4Y1mz`pHX*P+rTw17AYfH0IX$EL^mkUd?0qN8!^Htr`JknTprJ{9X)~{H2Y=_WP zPyYtRjRuJ0H?f3zf=@yw0YEGlmu55hIRQlJ=3reDqYx^lx?as)@O`@Ilqwb1Dbpf# z+)|<8R7>ST#fNf@mMT-@BioTw1I&(2HkMP9r(~}O1IK1%{rb(?!j$kVA zZf)k$!%>J83*hC)FCRwi45CC4EZHvXX2uY3PXpB&NW1 zJ`nm51&Zt(3P3agM=cN}q|(X{rL`8qnDdqr_5=#!04F6RvXm&BM!J=Pfj;I?Pl5Hm z#mY_~9i<75W`RT(mOU{XajJJDk>;Q=nLHA#$wI=7Tmfqu4@Aa3o$hNJI(nGCP6Zpa z_VXE?I$;u3U94rz!MNp}yLMKywv+g#-1+Qizg%-OOWWJcbME~i>*Qd~S|SOh3)x3U gBZF6uPgV9(&+PNN1!MZhH>~|XKY!nQbLr^eFTw#^3;+NC literal 0 HcmV?d00001 diff --git a/checkpoint/CURRENT b/checkpoint/CURRENT new file mode 100644 index 0000000..aa5bb8e --- /dev/null +++ b/checkpoint/CURRENT @@ -0,0 +1 @@ +MANIFEST-000005 diff --git a/checkpoint/MANIFEST-000005 b/checkpoint/MANIFEST-000005 new file mode 100644 index 0000000000000000000000000000000000000000..2d77a89634adc64ecf9e462148fd262483d0b025 GIT binary patch literal 240 zcmZS8)^KKEU<~`Yer`cxQDRAc(HCZ(C>91r zCI%LUKRRkZ*%%l(8JO8v7$$#gXqy2P=KylKSQyh8I6rdeSQr?Zo26J->KY|kB Vec { + let mut buffer = Vec::new(); + self.serialize_topology_recursive(&self.root, &mut buffer); + buffer + } +} + + +impl KDTree { + + fn serialize_topology_recursive(&self, current_opt: &Option>, buffer: &mut Vec) { + if let Some(current) = current_opt { + // push marker byte + buffer.push(1u8); + + let uuid_bytes = current.indexed_vector.id.to_bytes_le(); + buffer.extend_from_slice(&uuid_bytes); + + // serialize left subtree topology + self.serialize_topology_recursive(¤t.left, buffer); + // serialize right subtree topology + self.serialize_topology_recursive(¤t.right, buffer); + } else { + // push skip marker byte + buffer.push(0u8); + } + } +} diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index 0dfb39c..27f8555 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -65,3 +65,7 @@ pub enum IndexType { KDTree, HNSW, } + +pub trait SerializableIndexer { + fn serialize_topology(&self) -> Vec; +} From c4590d75e1671c5ce40a83b5d79c0f8484edc1c1 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Fri, 2 Jan 2026 01:03:07 +0530 Subject: [PATCH 09/25] implement serialization and deserialization logic for flat and kdtree --- Cargo.lock | 281 +++++++++++++++++++++++++- Cargo.toml | 2 + crates/api/src/lib.rs | 2 +- crates/defs/src/error.rs | 1 + crates/index/Cargo.toml | 1 + crates/index/src/flat.rs | 270 ------------------------- crates/index/src/flat/index.rs | 71 +++++++ crates/index/src/flat/mod.rs | 5 + crates/index/src/flat/serialize.rs | 72 +++++++ crates/index/src/flat/tests.rs | 239 ++++++++++++++++++++++ crates/index/src/kd_tree/mod.rs | 2 +- crates/index/src/kd_tree/serialize.rs | 158 +++++++++++++-- crates/index/src/kd_tree/tests.rs | 35 +++- crates/index/src/lib.rs | 8 +- crates/snapshot/Cargo.toml | 20 ++ crates/snapshot/src/lib.rs | 89 ++++++++ crates/snapshot/src/types.rs | 27 +++ crates/snapshot/src/util.rs | 117 +++++++++++ 18 files changed, 1102 insertions(+), 298 deletions(-) delete mode 100644 crates/index/src/flat.rs create mode 100644 crates/index/src/flat/index.rs create mode 100644 crates/index/src/flat/mod.rs create mode 100644 crates/index/src/flat/serialize.rs create mode 100644 crates/index/src/flat/tests.rs create mode 100644 crates/snapshot/Cargo.toml create mode 100644 crates/snapshot/src/lib.rs create mode 100644 crates/snapshot/src/types.rs create mode 100644 crates/snapshot/src/util.rs diff --git a/Cargo.lock b/Cargo.lock index fa047df..7b9770d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,15 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" version = "1.0.100" @@ -66,6 +75,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "axum" version = "0.8.8" @@ -199,6 +214,15 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.19.1" @@ -263,6 +287,20 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -330,6 +368,24 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossterm" version = "0.27.0" @@ -355,6 +411,22 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + [[package]] name = "defs" version = "0.1.0" @@ -363,6 +435,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -427,6 +509,18 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + [[package]] name = "find-msvc-tools" version = "0.1.6" @@ -439,6 +533,16 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flate2" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -525,6 +629,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.16" @@ -777,6 +891,30 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -889,6 +1027,7 @@ checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5" name = "index" version = "0.1.0" dependencies = [ + "bincode", "defs", "serde", "uuid", @@ -1001,6 +1140,17 @@ dependencies = [ "windows-link", ] +[[package]] +name = "libredox" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +dependencies = [ + "bitflags 2.10.0", + "libc", + "redox_syscall 0.7.0", +] + [[package]] name = "librocksdb-sys" version = "0.11.0+8.1.1" @@ -1115,6 +1265,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -1182,6 +1333,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.37.3" @@ -1265,7 +1425,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -1482,6 +1642,15 @@ dependencies = [ "bitflags 2.10.0", ] +[[package]] +name = "redox_syscall" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "regex" version = "1.12.2" @@ -1692,6 +1861,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" @@ -1782,6 +1957,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1828,6 +2014,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "slab" version = "0.4.11" @@ -1840,6 +2032,25 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "snapshot" +version = "0.1.0" +dependencies = [ + "chrono", + "data-encoding", + "defs", + "flate2", + "index", + "semver", + "serde", + "serde_json", + "sha2", + "storage", + "tar", + "tempfile", + "uuid", +] + [[package]] name = "socket2" version = "0.6.1" @@ -1963,6 +2174,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.24.0" @@ -2277,6 +2499,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicase" version = "2.8.1" @@ -2360,6 +2588,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "want" version = "0.3.1" @@ -2474,6 +2708,41 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -2743,6 +3012,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index 091c725..83e1dae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "crates/http", "crates/tui", "crates/grpc", + "crates/snapshot", ] [workspace.package] @@ -51,3 +52,4 @@ index = { path = "crates/index" } server = { path = "crates/server" } storage = { path = "crates/storage" } tui = { path = "crates/tui" } +snapshot = { path = "crates/snapshot" } diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 72739ac..642853d 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; // use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, RwLock}; -use index::flat::FlatIndex; +use index::flat::index::FlatIndex; use index::{IndexType, VectorIndex}; use storage::rocks_db::RocksDbStorage; use storage::{StorageEngine, StorageType, VectorPage}; diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index 6ebe861..b8ba5fa 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -10,6 +10,7 @@ pub enum DbError { IndexInitError, //TODO: Change this UnsupportedSimilarity, DimensionMismatch, + SnapshotError(String), } #[derive(Debug)] diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index 4a5bc14..e8abf22 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true license.workspace = true [dependencies] +bincode.workspace = true defs.workspace = true serde.workspace = true uuid.workspace = true diff --git a/crates/index/src/flat.rs b/crates/index/src/flat.rs deleted file mode 100644 index c0910e3..0000000 --- a/crates/index/src/flat.rs +++ /dev/null @@ -1,270 +0,0 @@ -use defs::{DbError, DenseVector, DistanceOrderedVector, IndexedVector, PointId, Similarity}; - -use crate::{VectorIndex, distance}; - -pub struct FlatIndex { - index: Vec, -} - -impl FlatIndex { - pub fn new() -> Self { - Self { index: Vec::new() } - } - - pub fn build(vectors: Vec) -> Self { - FlatIndex { index: vectors } - } -} - -impl Default for FlatIndex { - fn default() -> Self { - Self::new() - } -} - -impl VectorIndex for FlatIndex { - fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { - self.index.push(vector); - Ok(()) - } - - fn delete(&mut self, point_id: PointId) -> Result { - if let Some(pos) = self.index.iter().position(|vector| vector.id == point_id) { - self.index.remove(pos); - Ok(true) - } else { - Ok(false) - } - } - - fn search( - &self, - query_vector: DenseVector, - similarity: Similarity, - k: usize, - ) -> Result, DbError> { - let scores = self - .index - .iter() - .map(|point| DistanceOrderedVector { - distance: distance(&point.vector, &query_vector, similarity), - query_vector: &query_vector, - point_id: Some(point.id), - }) - .collect::>(); - - // select k smallest elements in scores using a max heap - let mut heap = std::collections::BinaryHeap::::new(); - for score in scores { - if heap.len() < k { - heap.push(score); - } else if score < *heap.peek().unwrap() { - heap.pop(); - heap.push(score); - } - } - Ok(heap - .into_sorted_vec() - .into_iter() - .map(|v| v.point_id.unwrap()) - .collect()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use uuid::Uuid; - - #[test] - fn test_flat_index_new() { - let index = FlatIndex::new(); - assert_eq!(index.index.len(), 0); - } - - #[test] - fn test_flat_index_build() { - let vectors = vec![ - IndexedVector { - id: Uuid::new_v4(), - vector: vec![1.0, 2.0, 3.0], - }, - IndexedVector { - id: Uuid::new_v4(), - vector: vec![4.0, 5.0, 6.0], - }, - ]; - let index = FlatIndex::build(vectors.clone()); - assert_eq!(index.index, vectors); - } - - #[test] - fn test_insert() { - let mut index = FlatIndex::new(); - let vector = IndexedVector { - id: Uuid::new_v4(), - vector: vec![1.0, 2.0, 3.0], - }; - - assert!(index.insert(vector.clone()).is_ok()); - assert_eq!(index.index.len(), 1); - assert_eq!(index.index[0], vector); - } - - #[test] - fn test_delete_existing() { - let mut index = FlatIndex::new(); - let existing_id = Uuid::new_v4(); - let vector = IndexedVector { - id: existing_id, - vector: vec![1.0, 2.0, 3.0], - }; - index.insert(vector).unwrap(); - - let result = index.delete(existing_id).unwrap(); - assert!(result); - assert_eq!(index.index.len(), 0); - } - - #[test] - fn test_delete_non_existing() { - let mut index = FlatIndex::new(); - let vector = IndexedVector { - id: Uuid::new_v4(), - vector: vec![1.0, 2.0, 3.0], - }; - index.insert(vector).unwrap(); - - let result = index.delete(Uuid::new_v4()).unwrap(); - assert!(!result); - assert_eq!(index.index.len(), 1); - } - - #[test] - fn test_search_euclidean() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 1.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![2.0, 2.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![10.0, 10.0], - }) - .unwrap(); - - let results = index - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results, vec![id1, id2]); - } - - #[test] - fn test_search_cosine() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 0.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![0.5, 0.5], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![0.0, 1.0], - }) - .unwrap(); - - let results = index.search(vec![1.0, 1.0], Similarity::Cosine, 2).unwrap(); - assert_eq!(results, vec![id2, id1]); - } - - #[test] - fn test_search_manhattan() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 1.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![2.0, 2.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![5.0, 5.0], - }) - .unwrap(); - - let results = index - .search(vec![0.0, 0.0], Similarity::Manhattan, 2) - .unwrap(); - assert_eq!(results, vec![id1, id2]); - } - - #[test] - fn test_search_hamming() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 0.0, 1.0, 1.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![1.0, 0.0, 0.0, 0.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![0.0, 0.0, 0.0, 0.0], - }) - .unwrap(); - - let results = index - .search(vec![1.0, 0.0, 0.0, 0.0], Similarity::Hamming, 2) - .unwrap(); - assert_eq!(results, vec![id2, id3]); - } - - #[test] - fn test_default() { - let index = FlatIndex::default(); - assert_eq!(index.index.len(), 0); - } -} diff --git a/crates/index/src/flat/index.rs b/crates/index/src/flat/index.rs new file mode 100644 index 0000000..87f814f --- /dev/null +++ b/crates/index/src/flat/index.rs @@ -0,0 +1,71 @@ +use crate::{VectorIndex, distance}; +use defs::{DbError, DenseVector, DistanceOrderedVector, IndexedVector, PointId, Similarity}; + +pub struct FlatIndex { + pub index: Vec, +} + +impl FlatIndex { + pub fn new() -> Self { + Self { index: Vec::new() } + } + + pub fn build(vectors: Vec) -> Self { + FlatIndex { index: vectors } + } +} + +impl Default for FlatIndex { + fn default() -> Self { + Self::new() + } +} + +impl VectorIndex for FlatIndex { + fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError> { + self.index.push(vector); + Ok(()) + } + + fn delete(&mut self, point_id: PointId) -> Result { + if let Some(pos) = self.index.iter().position(|vector| vector.id == point_id) { + self.index.remove(pos); + Ok(true) + } else { + Ok(false) + } + } + + fn search( + &self, + query_vector: DenseVector, + similarity: Similarity, + k: usize, + ) -> Result, DbError> { + let scores = self + .index + .iter() + .map(|point| DistanceOrderedVector { + distance: distance(&point.vector, &query_vector, similarity), + query_vector: &query_vector, + point_id: Some(point.id), + }) + .collect::>(); + + // select k smallest elements in scores using a max heap + let mut heap = std::collections::BinaryHeap::::new(); + for score in scores { + if heap.len() < k { + heap.push(score); + } else if score < *heap.peek().unwrap() { + heap.pop(); + heap.push(score); + } + } + Ok(heap + .into_sorted_vec() + .into_iter() + .map(|v| v.point_id.unwrap()) + .collect()) + } +} diff --git a/crates/index/src/flat/mod.rs b/crates/index/src/flat/mod.rs new file mode 100644 index 0000000..2fd3c64 --- /dev/null +++ b/crates/index/src/flat/mod.rs @@ -0,0 +1,5 @@ +pub mod index; +mod serialize; + +#[cfg(test)] +mod tests; diff --git a/crates/index/src/flat/serialize.rs b/crates/index/src/flat/serialize.rs new file mode 100644 index 0000000..191c053 --- /dev/null +++ b/crates/index/src/flat/serialize.rs @@ -0,0 +1,72 @@ +use crate::SerializableIndexer; +use crate::flat::index::FlatIndex; +use defs::{DbError, IndexedVector}; +use serde::{Deserialize, Serialize}; +use std::io::{Cursor, Read}; +use uuid::Uuid; + +const FLAT_MAGIC_BYTES: [u8; 4] = [0x00, 0x00, 0x00, 0x01]; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FlatIndexMetadata { + total_points: usize, +} + +impl FlatIndex { + pub fn deserialize( + metadata_bytes: Vec, + topology_bytes: Vec, + ) -> Result, DbError> { + let metadata: FlatIndexMetadata = bincode::deserialize(&metadata_bytes).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize FlatIndex Metadata: {}", e)) + })?; + let total_points = metadata.total_points; + + let mut cursor = Cursor::new(topology_bytes); + let mut vectors = Vec::new(); + + for _ in 0..total_points { + let mut uuid_slice = [0u8; 16]; + cursor.read_exact(&mut uuid_slice).map_err(|e| { + DbError::SerializationError(format!( + "Failed to deserialize FlatIndex Topology: {}", + e + )) + })?; + let id = Uuid::from_bytes_le(uuid_slice); + vectors.push(IndexedVector { + id, + vector: Vec::new(), + }); + } + + Ok(Box::new(FlatIndex { index: vectors })) + } +} + +impl SerializableIndexer for FlatIndex { + fn magic_bytes(&self) -> [u8; 4] { + FLAT_MAGIC_BYTES + } + + fn serialize_topology(&self) -> Result, DbError> { + let mut buffer: Vec = Vec::new(); + for point in &self.index { + buffer.extend_from_slice(&point.id.to_bytes_le()); + } + Ok(buffer) + } + + fn serialize_metadata(&self) -> Result, DbError> { + let mut buffer: Vec = Vec::new(); + let metadata = FlatIndexMetadata { + total_points: self.index.len(), + }; + + let metadata_bytes = bincode::serialize(&metadata).map_err(|e| { + DbError::SerializationError(format!("Failed to serialize FlatIndex Metadata: {}", e)) + })?; + buffer.extend_from_slice(&metadata_bytes); + Ok(buffer) + } +} diff --git a/crates/index/src/flat/tests.rs b/crates/index/src/flat/tests.rs new file mode 100644 index 0000000..c225445 --- /dev/null +++ b/crates/index/src/flat/tests.rs @@ -0,0 +1,239 @@ +use super::index::FlatIndex; +use crate::{SerializableIndexer, VectorIndex}; +use defs::{IndexedVector, Similarity}; +use uuid::Uuid; + +#[test] +fn test_flat_index_new() { + let index = FlatIndex::new(); + assert_eq!(index.index.len(), 0); +} + +#[test] +fn test_flat_index_build() { + let vectors = vec![ + IndexedVector { + id: Uuid::new_v4(), + vector: vec![1.0, 2.0, 3.0], + }, + IndexedVector { + id: Uuid::new_v4(), + vector: vec![4.0, 5.0, 6.0], + }, + ]; + let index = FlatIndex::build(vectors.clone()); + assert_eq!(index.index, vectors); +} + +#[test] +fn test_insert() { + let mut index = FlatIndex::new(); + let vector = IndexedVector { + id: Uuid::new_v4(), + vector: vec![1.0, 2.0, 3.0], + }; + + assert!(index.insert(vector.clone()).is_ok()); + assert_eq!(index.index.len(), 1); + assert_eq!(index.index[0], vector); +} + +#[test] +fn test_delete_existing() { + let mut index = FlatIndex::new(); + let existing_id = Uuid::new_v4(); + let vector = IndexedVector { + id: existing_id, + vector: vec![1.0, 2.0, 3.0], + }; + index.insert(vector).unwrap(); + + let result = index.delete(existing_id).unwrap(); + assert!(result); + assert_eq!(index.index.len(), 0); +} + +#[test] +fn test_delete_non_existing() { + let mut index = FlatIndex::new(); + let vector = IndexedVector { + id: Uuid::new_v4(), + vector: vec![1.0, 2.0, 3.0], + }; + index.insert(vector).unwrap(); + + let result = index.delete(Uuid::new_v4()).unwrap(); + assert!(!result); + assert_eq!(index.index.len(), 1); +} + +#[test] +fn test_search_euclidean() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 1.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![2.0, 2.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![10.0, 10.0], + }) + .unwrap(); + + let results = index + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results, vec![id1, id2]); +} + +#[test] +fn test_search_cosine() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 0.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![0.5, 0.5], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![0.0, 1.0], + }) + .unwrap(); + + let results = index.search(vec![1.0, 1.0], Similarity::Cosine, 2).unwrap(); + assert_eq!(results, vec![id2, id1]); +} + +#[test] +fn test_search_manhattan() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 1.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![2.0, 2.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![5.0, 5.0], + }) + .unwrap(); + + let results = index + .search(vec![0.0, 0.0], Similarity::Manhattan, 2) + .unwrap(); + assert_eq!(results, vec![id1, id2]); +} + +#[test] +fn test_search_hamming() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 0.0, 1.0, 1.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![1.0, 0.0, 0.0, 0.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![0.0, 0.0, 0.0, 0.0], + }) + .unwrap(); + + let results = index + .search(vec![1.0, 0.0, 0.0, 0.0], Similarity::Hamming, 2) + .unwrap(); + assert_eq!(results, vec![id2, id3]); +} + +#[test] +fn test_default() { + let index = FlatIndex::default(); + assert_eq!(index.index.len(), 0); +} + +#[test] +fn test_serialize_and_deserialize() { + // currently fails because vectors arent restored + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + + let v1 = IndexedVector { + id: id1.clone(), + vector: vec![0.0, 0.0, 0.0, 0.0], + }; + let v2 = IndexedVector { + id: id2.clone(), + vector: vec![1.0, 0.0, 0.0, 0.0], + }; + let v3 = IndexedVector { + id: id3.clone(), + vector: vec![2.0, 0.0, 0.0, 0.0], + }; + let v4 = IndexedVector { + id: id4.clone(), + vector: vec![3.0, 0.0, 0.0, 0.0], + }; + + let vectors = vec![v1.clone(), v2.clone(), v3.clone(), v4.clone()]; + let mut index_before = FlatIndex::build(vectors); + index_before.insert(v4).unwrap(); + + index_before.delete(id1).unwrap(); + + let serialized_meta = index_before.serialize_metadata().unwrap(); + let serialized_topo = index_before.serialize_topology().unwrap(); + + let idx = FlatIndex::deserialize(serialized_meta, serialized_topo).unwrap(); + + assert_eq!(idx.index.len(), 3); + assert!(!idx.index.contains(&v1)); + assert!(idx.index.contains(&v2)); + assert!(idx.index.contains(&v3)); + assert!(idx.index.contains(&v3)); +} diff --git a/crates/index/src/kd_tree/mod.rs b/crates/index/src/kd_tree/mod.rs index 04fcff5..715bba0 100644 --- a/crates/index/src/kd_tree/mod.rs +++ b/crates/index/src/kd_tree/mod.rs @@ -1,6 +1,6 @@ pub mod index; -pub mod types; mod serialize; +pub mod types; #[cfg(test)] mod tests; diff --git a/crates/index/src/kd_tree/serialize.rs b/crates/index/src/kd_tree/serialize.rs index b69a9ab..49d3bd3 100644 --- a/crates/index/src/kd_tree/serialize.rs +++ b/crates/index/src/kd_tree/serialize.rs @@ -1,34 +1,148 @@ -use super::types::{KDTreeNode}; +use std::collections::HashSet; +use std::io::{Cursor, Read}; + use super::index::KDTree; +use super::types::KDTreeNode; use crate::SerializableIndexer; +use bincode; +use defs::{DbError, IndexedVector, PointId}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +const KD_TREE_MAGIC_BYTES: [u8; 4] = [0x00, 0x00, 0x00, 0x00]; +#[derive(Serialize, Deserialize)] +pub struct KDTreeMetadata { + pub dim: usize, + pub total_nodes: usize, + pub deleted_count: usize, +} impl SerializableIndexer for KDTree { - fn serialize_topology(&self) -> Vec { - let mut buffer = Vec::new(); - self.serialize_topology_recursive(&self.root, &mut buffer); - buffer - } + fn magic_bytes(&self) -> [u8; 4] { + KD_TREE_MAGIC_BYTES + } + + fn serialize_topology(&self) -> Result, DbError> { + let mut buffer = Vec::new(); + self.serialize_topology_recursive(&self.root, &mut buffer)?; + Ok(buffer) + } + + fn serialize_metadata(&self) -> Result, DbError> { + let mut buffer = Vec::new(); + let km = KDTreeMetadata { + dim: self.dim, + total_nodes: self.total_nodes, + deleted_count: self.deleted_count, + }; + let metadata_bytes = bincode::serialize(&km).map_err(|e| { + DbError::SerializationError(format!("Failed to serailize KD Tree Metadata: {}", e)) + })?; + buffer.extend_from_slice(metadata_bytes.as_slice()); + Ok(buffer) + } } +const NODE_MARKER_BYTE: u8 = 1u8; +const SKIP_MARKER_BYTE: u8 = 0u8; + +const DELETED_MASK: u8 = 2u8; impl KDTree { + pub fn deserialize( + metadata_bytes: Vec, + topology_bytes: Vec, + ) -> Result, DbError> { + let metadata: KDTreeMetadata = + bincode::deserialize(metadata_bytes.as_slice()).map_err(|e| { + DbError::SerializationError(format!( + "Failed to deserailize KD Tree Metadata: {}", + e + )) + })?; + + let mut buf = Cursor::new(topology_bytes); + let mut non_deleted = HashSet::new(); + let root = deserialize_topology_recursive(&mut buf, &mut non_deleted)?; - fn serialize_topology_recursive(&self, current_opt: &Option>, buffer: &mut Vec) { - if let Some(current) = current_opt { - // push marker byte - buffer.push(1u8); - - let uuid_bytes = current.indexed_vector.id.to_bytes_le(); - buffer.extend_from_slice(&uuid_bytes); - - // serialize left subtree topology - self.serialize_topology_recursive(¤t.left, buffer); - // serialize right subtree topology - self.serialize_topology_recursive(¤t.right, buffer); - } else { - // push skip marker byte - buffer.push(0u8); - } + Ok(Box::new(KDTree { + dim: metadata.dim, + root, + point_ids: non_deleted, + total_nodes: metadata.total_nodes, + deleted_count: metadata.deleted_count, + })) } + + fn serialize_topology_recursive( + &self, + current_opt: &Option>, + buffer: &mut Vec, + ) -> Result<(), DbError> { + if let Some(current) = current_opt { + let mut marker = NODE_MARKER_BYTE; + if current.is_deleted { + marker |= DELETED_MASK; + } + buffer.push(marker); + + let uuid_bytes = current.indexed_vector.id.to_bytes_le(); + buffer.extend_from_slice(&uuid_bytes); + + // serialize left subtree topology + self.serialize_topology_recursive(¤t.left, buffer)?; + // serialize right subtree topology + self.serialize_topology_recursive(¤t.right, buffer)?; + } else { + buffer.push(SKIP_MARKER_BYTE); + } + Ok(()) + } +} + +fn deserialize_topology_recursive( + buffer: &mut Cursor>, + non_deleted: &mut HashSet, +) -> Result>, DbError> { + let mut current_marker: [u8; 1] = [0u8; 1]; + buffer.read_exact(&mut current_marker).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize KD Topology: {}", e)) + })?; + + if current_marker[0] == SKIP_MARKER_BYTE { + return Ok(None); + } + + let mut uuid_bytes = [0u8; 16]; + buffer.read_exact(&mut uuid_bytes).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize KD Topology: {}", e)) + })?; + let uuid = Uuid::from_bytes_le(uuid_bytes); + let indexed_vector = IndexedVector { + id: uuid, + vector: Vec::new(), + }; + + let is_deleted = current_marker[0] & DELETED_MASK == DELETED_MASK; + if !is_deleted { + non_deleted.insert(uuid); + } + + // pre order deserialization + let left_node = deserialize_topology_recursive(buffer, non_deleted)?; + let right_node = deserialize_topology_recursive(buffer, non_deleted)?; + + let left_size = left_node.as_ref().map_or(0, |n| n.subtree_size); + let right_size = right_node.as_ref().map_or(0, |n| n.subtree_size); + + let current_node = KDTreeNode { + indexed_vector, + left: left_node, + right: right_node, + is_deleted, + subtree_size: left_size + right_size + 1, + }; + + Ok(Some(Box::new(current_node))) } diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs index faefae5..43f79a5 100644 --- a/crates/index/src/kd_tree/tests.rs +++ b/crates/index/src/kd_tree/tests.rs @@ -1,7 +1,8 @@ use super::index::KDTree; +use crate::SerializableIndexer; use crate::VectorIndex; use crate::distance; -use crate::flat::FlatIndex; +use crate::flat::index::FlatIndex; use defs::{DbError, IndexedVector, Similarity}; use std::collections::HashSet; use uuid::Uuid; @@ -701,3 +702,35 @@ fn test_kdtree_vs_flat_euclidean_5d() { } } } + +#[test] +fn test_serialize_and_deserialize() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 2.0, 3.0]), + make_vector_with_id(id2, vec![4.0, 5.0, 6.0]), + make_vector_with_id(id3, vec![7.0, 8.0, 9.0]), + ]; + let mut tree_before = KDTree::build(vectors).unwrap(); + tree_before + .insert(make_vector_with_id(id4, vec![10.0, 11.0, 12.0])) + .unwrap(); + tree_before.delete(id1).unwrap(); + + let serialized_meta = tree_before.serialize_metadata().unwrap(); + let serialized_topo = tree_before.serialize_topology().unwrap(); + + let tree = KDTree::deserialize(serialized_meta, serialized_topo).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 4); + assert!(!tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); + assert!(tree.point_ids.contains(&id3)); +} diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index 27f8555..8e3a857 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -66,6 +66,10 @@ pub enum IndexType { HNSW, } -pub trait SerializableIndexer { - fn serialize_topology(&self) -> Vec; +pub trait SerializableIndexer: VectorIndex { + fn serialize_topology(&self) -> Result, DbError>; + fn serialize_metadata(&self) -> Result, DbError>; + fn magic_bytes(&self) -> [u8; 4]; + + // fn deserialize(metadata: Vec, topology: Vec) -> Result, DbError>; } diff --git a/crates/snapshot/Cargo.toml b/crates/snapshot/Cargo.toml new file mode 100644 index 0000000..97b19e5 --- /dev/null +++ b/crates/snapshot/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "snapshot" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +semver = "1.0.27" +defs.workspace = true +index.workspace = true +storage.workspace = true +tempfile.workspace = true +uuid.workspace = true +sha2 = "0.10.9" +flate2 = "1.1.5" +chrono.workspace = true +tar = "0.4.44" +serde.workspace = true +serde_json.workspace = true +data-encoding = "2.9.0" diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs new file mode 100644 index 0000000..14a26fa --- /dev/null +++ b/crates/snapshot/src/lib.rs @@ -0,0 +1,89 @@ +pub mod types; +mod util; + +use crate::types::{Snapshot, SnapshotManifest}; +use chrono::{DateTime, Local}; +use defs::DbError; +use index::SerializableIndexer; +use semver::Version; +use std::{path::PathBuf, time::SystemTime}; +use storage::StorageEngine; +use uuid::Uuid; + +const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); + +impl Snapshot { + pub fn create( + index: &impl SerializableIndexer, + _storage: &impl StorageEngine, + path: PathBuf, + ) -> Result { + let id = Uuid::new_v4(); + let date = SystemTime::now(); + + if !path.is_dir() { + return Err(DbError::SnapshotError(format!( + "Invalid path: {}", + path.display() + ))); + } + + let index_metadata_b = index.serialize_metadata()?; + let index_topology_b = index.serialize_topology()?; + + let magic_b = index.magic_bytes(); + + // save index snapshots + let metadata_path = Self::save_metadata(&path, id, &index_metadata_b, &magic_b)?; + let topology_path = Self::save_topology(&path, id, &index_topology_b, &magic_b)?; + + // save storage checkpoint + let storage_checkpoint_path = PathBuf::default(); + + // take checksums + let index_metadata_checksum = util::sha256_digest(&metadata_path) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + let index_topo_checksum = util::sha256_digest(&topology_path) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + let storage_checkpoint_checksum = String::new(); // TODO: do this + + let dt_now_local: DateTime = date.into(); + + // create manifest file + let manifest = SnapshotManifest { + id, + date: dt_now_local.timestamp(), + sem_ver: SNAPSHOT_PARSER_VER.to_string(), + index_metadata_checksum, + index_topo_checksum, + storage_checkpoint_checksum, + }; + + Self::save_manifest(&path, &manifest).map_err(|e| DbError::SnapshotError(e.to_string()))?; + + let tar_filename = format!( + "{}-{}-{}.tar", + dt_now_local.to_rfc3339(), + id, + SNAPSHOT_PARSER_VER + ); + let tar_gz_path = path.join(tar_filename); + + Self::compress_archive( + &tar_gz_path, + &[&metadata_path, &topology_path, &storage_checkpoint_path], + ) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + + Ok(Snapshot { + id, + date, + path: tar_gz_path, + sem_ver: SNAPSHOT_PARSER_VER, + }) + } + + // fn open(path : PathBuf) -> Result; + + // fn load(&self) -> Result((Box, Box)) +} diff --git a/crates/snapshot/src/types.rs b/crates/snapshot/src/types.rs new file mode 100644 index 0000000..a432f12 --- /dev/null +++ b/crates/snapshot/src/types.rs @@ -0,0 +1,27 @@ +use semver::Version; +use serde::{Deserialize, Serialize}; +use std::{path::PathBuf, time::SystemTime}; +use uuid::Uuid; + +pub struct Snapshot { + pub id: Uuid, + pub date: SystemTime, + pub path: PathBuf, + pub sem_ver: Version, +} + +type UnixTimestamp = i64; + +#[derive(Serialize, Deserialize)] +pub struct SnapshotManifest { + pub id: Uuid, + pub date: UnixTimestamp, + pub sem_ver: String, + pub index_metadata_checksum: String, + pub index_topo_checksum: String, + pub storage_checkpoint_checksum: String, +} + +// TODO: NOTES +// manifest should have checksums of topo , metadata , storage, parser version, date created, snapshot metadata: id, date, sem_ver +// shoudl decode filename only when SnapshotEngine is being used otherwise it is pretty much useless diff --git a/crates/snapshot/src/util.rs b/crates/snapshot/src/util.rs new file mode 100644 index 0000000..6f2b485 --- /dev/null +++ b/crates/snapshot/src/util.rs @@ -0,0 +1,117 @@ +use crate::types::{Snapshot, SnapshotManifest}; +use data_encoding::HEXLOWER; +use sha2::{Digest, Sha256}; +use std::fs::File; +use std::io::{BufReader, Error, Read}; +use std::path::PathBuf; + +use defs::DbError; +use flate2::{Compression, read::GzEncoder}; +use std::{ + io::{BufWriter, Write}, + path::Path, +}; +use tar::Builder; +use uuid::Uuid; + +#[inline] +fn metadata_file_name(id: &Uuid) -> String { + format!("{}-index-meta.bin", id) +} + +#[inline] +fn topology_file_name(id: &Uuid) -> String { + format!("{}-index-topo.bin", id) +} + +// sauce: https://stackoverflow.com/questions/69787906/how-to-hash-a-binary-file-in-rust +pub fn sha256_digest(path: &PathBuf) -> Result { + let input = File::open(path)?; + let mut reader = BufReader::new(input); + + let digest = { + let mut hasher = Sha256::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer)?; + if count == 0 { + break; + } + hasher.update(&buffer[..count]); + } + hasher.finalize() + }; + Ok(HEXLOWER.encode(digest.as_ref())) +} + +impl Snapshot { + pub fn save_metadata( + path: &Path, + uuid: Uuid, + bytes: &[u8], + magic: &[u8; 4], + ) -> Result { + let file_name = metadata_file_name(&uuid); + let metadata_file_path = path.join(file_name); + + let mut file = std::fs::File::create(metadata_file_path.clone()).map_err(|e| { + DbError::SnapshotError(format!("Could not create metadata file: {}", e)) + })?; + + file.write_all(magic) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(&bytes.len().to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(bytes) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + + Ok(metadata_file_path) + } + + pub fn save_topology( + path: &Path, + uuid: Uuid, + bytes: &[u8], + magic: &[u8; 4], + ) -> Result { + let file_name = topology_file_name(&uuid); + let topology_file_path = path.join(file_name); + + let mut file = std::fs::File::create(topology_file_path.clone()).map_err(|e| { + DbError::SnapshotError(format!("Could not create topology file: {}", e)) + })?; + + file.write_all(magic) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + file.write_all(&bytes.len().to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + file.write_all(bytes) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + + Ok(topology_file_path) + } + + pub fn save_manifest(path: &Path, manifest: &SnapshotManifest) -> Result<(), Error> { + let manifest_file = path.join("manifest.json"); + + let file = std::fs::File::create(manifest_file.clone())?; + let mut writer = BufWriter::new(file); + serde_json::to_writer(&mut writer, manifest)?; + writer.flush()?; + + Ok(()) + } + + pub fn compress_archive(path: &Path, files: &[&Path]) -> Result<(), Error> { + let tar_gz = File::create(path)?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut tar = Builder::new(enc); + + for file in files { + tar.append_path(file)?; + } + + tar.into_inner()?; + Ok(()) + } +} From 6097b1977eaec633ed43de297e97ab589ed4b23f Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Fri, 2 Jan 2026 07:05:36 +0530 Subject: [PATCH 10/25] implement checkpointing for storage engine and add unit tests --- Cargo.lock | 3 + checkpoint/000009.sst | Bin 1137 -> 0 bytes checkpoint/CURRENT | 1 - checkpoint/MANIFEST-000005 | Bin 240 -> 0 bytes checkpoint/OPTIONS-000007 | 199 -------------------------------- crates/api/Cargo.toml | 1 + crates/api/src/lib.rs | 56 +++++++-- crates/defs/src/error.rs | 1 + crates/index/src/lib.rs | 4 +- crates/snapshot/src/lib.rs | 38 ++++-- crates/snapshot/src/types.rs | 4 - crates/snapshot/src/util.rs | 16 +-- crates/storage/Cargo.toml | 2 + crates/storage/src/in_memory.rs | 7 ++ crates/storage/src/lib.rs | 5 +- crates/storage/src/rocks_db.rs | 152 ++++++++++++++++++------ 16 files changed, 216 insertions(+), 273 deletions(-) delete mode 100644 checkpoint/000009.sst delete mode 100644 checkpoint/CURRENT delete mode 100644 checkpoint/MANIFEST-000005 delete mode 100644 checkpoint/OPTIONS-000007 diff --git a/Cargo.lock b/Cargo.lock index 7b9770d..19f0378 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -53,6 +53,7 @@ version = "0.1.0" dependencies = [ "defs", "index", + "snapshot", "storage", "tempfile", "uuid", @@ -2089,7 +2090,9 @@ version = "0.1.0" dependencies = [ "bincode", "defs", + "flate2", "rocksdb", + "tar", "tempfile", "uuid", ] diff --git a/checkpoint/000009.sst b/checkpoint/000009.sst deleted file mode 100644 index 02a4aa1e3e0109ac593d029d3b076af14f964a60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1137 zcma)5&rcIU6yA|Sw@?aL#Ykd;2{+kEOMkIJAPNVSC>~4>+18>b-OJ9rH{bi-kMCLYD~p9%p1e(S;`)S^^rZgBy%15G-sKX7Z@L9qFg{pnGgC+LOqScpQYdK?26Rm{Dswl@3V& z6dEMvYSf+yT0{^P2@X5JHUH(Vf;N+|7X9{Q)8hi9WKnn{GS>bjZ$Cx#sHL=BwsnqC zyY4Y1mz`pHX*P+rTw17AYfH0IX$EL^mkUd?0qN8!^Htr`JknTprJ{9X)~{H2Y=_WP zPyYtRjRuJ0H?f3zf=@yw0YEGlmu55hIRQlJ=3reDqYx^lx?as)@O`@Ilqwb1Dbpf# z+)|<8R7>ST#fNf@mMT-@BioTw1I&(2HkMP9r(~}O1IK1%{rb(?!j$kVA zZf)k$!%>J83*hC)FCRwi45CC4EZHvXX2uY3PXpB&NW1 zJ`nm51&Zt(3P3agM=cN}q|(X{rL`8qnDdqr_5=#!04F6RvXm&BM!J=Pfj;I?Pl5Hm z#mY_~9i<75W`RT(mOU{XajJJDk>;Q=nLHA#$wI=7Tmfqu4@Aa3o$hNJI(nGCP6Zpa z_VXE?I$;u3U94rz!MNp}yLMKywv+g#-1+Qizg%-OOWWJcbME~i>*Qd~S|SOh3)x3U gBZF6uPgV9(&+PNN1!MZhH>~|XKY!nQbLr^eFTw#^3;+NC diff --git a/checkpoint/CURRENT b/checkpoint/CURRENT deleted file mode 100644 index aa5bb8e..0000000 --- a/checkpoint/CURRENT +++ /dev/null @@ -1 +0,0 @@ -MANIFEST-000005 diff --git a/checkpoint/MANIFEST-000005 b/checkpoint/MANIFEST-000005 deleted file mode 100644 index 2d77a89634adc64ecf9e462148fd262483d0b025..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 240 zcmZS8)^KKEU<~`Yer`cxQDRAc(HCZ(C>91r zCI%LUKRRkZ*%%l(8JO8v7$$#gXqy2P=KylKSQyh8I6rdeSQr?Zo26J->KY|kB Result { + let index = self.index.read().map_err(|_| DbError::LockError)?; + let storage = self.storage.as_ref(); + + let snapshot = Snapshot::create(&*index, storage, path.to_path_buf())?; + Ok(snapshot) + } + + pub fn restore_snapshot(&self, _path: &Path) -> Result<(), DbError> { + Ok(()) + } } #[derive(Debug)] @@ -168,10 +181,10 @@ mod tests { use super::*; use defs::ContentType; - use tempfile::tempdir; + use tempfile::{TempDir, tempdir}; // Helper function to create a test database - fn create_test_db() -> VectorDb { + fn create_test_db() -> (VectorDb, TempDir) { let temp_dir = tempdir().unwrap(); let config = DbConfig { storage_type: StorageType::RocksDb, @@ -179,12 +192,12 @@ mod tests { data_path: temp_dir.path().to_path_buf(), dimension: 3, }; - init_api(config).unwrap() + (init_api(config).unwrap(), temp_dir) } #[test] fn test_insert_and_get() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); let vector = vec![1.0, 2.0, 3.0]; let payload = Payload { content_type: ContentType::Text, @@ -209,7 +222,7 @@ mod tests { #[test] fn test_dimension_mismatch() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); let v1 = vec![1.0, 2.0, 3.0]; let v2 = vec![1.0, 2.0]; let payload = defs::Payload { @@ -228,7 +241,7 @@ mod tests { #[test] fn test_delete() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); let vector = vec![1.0, 2.0, 3.0]; let payload = Payload { content_type: ContentType::Text, @@ -251,7 +264,7 @@ mod tests { #[test] fn test_search() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); // Insert some points let vectors = vec![ @@ -280,7 +293,7 @@ mod tests { #[test] fn test_search_limit() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); // Insert 5 points let mut ids = Vec::new(); @@ -307,7 +320,7 @@ mod tests { #[test] fn test_empty_database() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); // Get non-existent point assert!(db.get(Uuid::new_v4()).unwrap().is_none()); @@ -319,7 +332,7 @@ mod tests { #[test] fn test_list_vectors() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); // insert some points let mut ids = Vec::new(); for i in 0..10 { @@ -350,7 +363,7 @@ mod tests { #[test] fn test_build_index() { - let db = create_test_db(); + let (db, _temp_dir) = create_test_db(); // insert some points for i in 0..10 { @@ -370,4 +383,23 @@ mod tests { let inserted = db.build_index().unwrap(); assert_eq!(inserted, 10); } + + #[test] + fn test_create_snapshot() { + let (db, _temp_dir) = create_test_db(); + + assert!( + db.insert( + vec![0.0, 1.0, 2.0], + Payload { + content_type: ContentType::Text, + content: format!("Test content {}", 0), + }, + ) + .is_ok() + ); + + let temp_snapshot_dir = tempdir().unwrap(); + assert!(db.create_snapshot(temp_snapshot_dir.path()).is_ok()); + } } diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index b8ba5fa..251a909 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -11,6 +11,7 @@ pub enum DbError { UnsupportedSimilarity, DimensionMismatch, SnapshotError(String), + StorageCheckpointError(String), } #[derive(Debug)] diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index 8e3a857..2d0bdc7 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -3,7 +3,7 @@ use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; pub mod flat; pub mod kd_tree; -pub trait VectorIndex: Send + Sync { +pub trait VectorIndex: Send + Sync + SerializableIndexer { fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError>; // Returns true if point id existed and is deleted, else returns false @@ -66,7 +66,7 @@ pub enum IndexType { HNSW, } -pub trait SerializableIndexer: VectorIndex { +pub trait SerializableIndexer { fn serialize_topology(&self) -> Result, DbError>; fn serialize_metadata(&self) -> Result, DbError>; fn magic_bytes(&self) -> [u8; 4]; diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 14a26fa..3fec845 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -4,18 +4,21 @@ mod util; use crate::types::{Snapshot, SnapshotManifest}; use chrono::{DateTime, Local}; use defs::DbError; -use index::SerializableIndexer; +use index::VectorIndex; use semver::Version; use std::{path::PathBuf, time::SystemTime}; use storage::StorageEngine; +use tempfile::tempdir; use uuid::Uuid; const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); +// TODO: implement snapshot engine that runs in its own thread and wakes up in regular intervals + impl Snapshot { pub fn create( - index: &impl SerializableIndexer, - _storage: &impl StorageEngine, + index: &dyn VectorIndex, + storage: &dyn StorageEngine, path: PathBuf, ) -> Result { let id = Uuid::new_v4(); @@ -28,24 +31,28 @@ impl Snapshot { ))); } + let temp_dir = tempdir().map_err(|e| DbError::SnapshotError(e.to_string()))?; + let index_metadata_b = index.serialize_metadata()?; let index_topology_b = index.serialize_topology()?; let magic_b = index.magic_bytes(); // save index snapshots - let metadata_path = Self::save_metadata(&path, id, &index_metadata_b, &magic_b)?; - let topology_path = Self::save_topology(&path, id, &index_topology_b, &magic_b)?; + let metadata_path = Self::save_metadata(temp_dir.path(), id, &index_metadata_b, &magic_b)?; + let topology_path = Self::save_topology(temp_dir.path(), id, &index_topology_b, &magic_b)?; // save storage checkpoint - let storage_checkpoint_path = PathBuf::default(); + let storage_checkpoint_path = temp_dir.path().join("storage-checkpoint.tar.gz"); + storage.checkpoint(&storage_checkpoint_path)?; // take checksums let index_metadata_checksum = util::sha256_digest(&metadata_path) .map_err(|e| DbError::SnapshotError(e.to_string()))?; let index_topo_checksum = util::sha256_digest(&topology_path) .map_err(|e| DbError::SnapshotError(e.to_string()))?; - let storage_checkpoint_checksum = String::new(); // TODO: do this + let storage_checkpoint_checksum = util::sha256_digest(&storage_checkpoint_path) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; let dt_now_local: DateTime = date.into(); @@ -59,19 +66,26 @@ impl Snapshot { storage_checkpoint_checksum, }; - Self::save_manifest(&path, &manifest).map_err(|e| DbError::SnapshotError(e.to_string()))?; + let manifest_path = Self::save_manifest(temp_dir.path(), &manifest) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; let tar_filename = format!( - "{}-{}-{}.tar", - dt_now_local.to_rfc3339(), - id, + "{}-{}-{}.tar.gz", + dt_now_local.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + &(id.to_string()[..5]), SNAPSHOT_PARSER_VER ); let tar_gz_path = path.join(tar_filename); Self::compress_archive( &tar_gz_path, - &[&metadata_path, &topology_path, &storage_checkpoint_path], + &[ + &metadata_path, + &topology_path, + &storage_checkpoint_path, + &manifest_path, + ], + temp_dir.path(), ) .map_err(|e| DbError::SnapshotError(e.to_string()))?; diff --git a/crates/snapshot/src/types.rs b/crates/snapshot/src/types.rs index a432f12..17e092c 100644 --- a/crates/snapshot/src/types.rs +++ b/crates/snapshot/src/types.rs @@ -21,7 +21,3 @@ pub struct SnapshotManifest { pub index_topo_checksum: String, pub storage_checkpoint_checksum: String, } - -// TODO: NOTES -// manifest should have checksums of topo , metadata , storage, parser version, date created, snapshot metadata: id, date, sem_ver -// shoudl decode filename only when SnapshotEngine is being used otherwise it is pretty much useless diff --git a/crates/snapshot/src/util.rs b/crates/snapshot/src/util.rs index 6f2b485..9458d78 100644 --- a/crates/snapshot/src/util.rs +++ b/crates/snapshot/src/util.rs @@ -24,7 +24,7 @@ fn topology_file_name(id: &Uuid) -> String { format!("{}-index-topo.bin", id) } -// sauce: https://stackoverflow.com/questions/69787906/how-to-hash-a-binary-file-in-rust +// source: https://stackoverflow.com/questions/69787906/how-to-hash-a-binary-file-in-rust pub fn sha256_digest(path: &PathBuf) -> Result { let input = File::open(path)?; let mut reader = BufReader::new(input); @@ -91,24 +91,26 @@ impl Snapshot { Ok(topology_file_path) } - pub fn save_manifest(path: &Path, manifest: &SnapshotManifest) -> Result<(), Error> { - let manifest_file = path.join("manifest.json"); + pub fn save_manifest(path: &Path, manifest: &SnapshotManifest) -> Result { + let manifest_path = path.join("manifest.json"); - let file = std::fs::File::create(manifest_file.clone())?; + let file = std::fs::File::create(manifest_path.clone())?; let mut writer = BufWriter::new(file); serde_json::to_writer(&mut writer, manifest)?; writer.flush()?; - Ok(()) + Ok(manifest_path) } - pub fn compress_archive(path: &Path, files: &[&Path]) -> Result<(), Error> { + pub fn compress_archive(path: &Path, files: &[&Path], base_dir: &Path) -> Result<(), Error> { let tar_gz = File::create(path)?; let enc = GzEncoder::new(tar_gz, Compression::default()); let mut tar = Builder::new(enc); for file in files { - tar.append_path(file)?; + let rel_path = file.strip_prefix(base_dir).unwrap_or(file); + let mut f = File::open(file)?; + tar.append_file(rel_path, &mut f)?; } tar.into_inner()?; diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index c786373..5cd90e1 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -9,6 +9,8 @@ license.workspace = true [dependencies] bincode.workspace = true defs.workspace = true +flate2 = "1.1.5" rocksdb.workspace = true +tar = "0.4.44" tempfile.workspace = true uuid.workspace = true diff --git a/crates/storage/src/in_memory.rs b/crates/storage/src/in_memory.rs index 5190082..5f384a1 100644 --- a/crates/storage/src/in_memory.rs +++ b/crates/storage/src/in_memory.rs @@ -1,5 +1,6 @@ use crate::{StorageEngine, VectorPage}; use defs::{DbError, DenseVector, Payload, PointId}; +use std::path::Path; pub struct MemoryStorage { // define here how MemoryStorage will be defined @@ -41,4 +42,10 @@ impl StorageEngine for MemoryStorage { fn list_vectors(&self, _offset: PointId, _limit: usize) -> Result, DbError> { Ok(None) } + fn checkpoint(&self, _path: &Path) -> Result<(), DbError> { + Ok(()) + } + fn restore_checkpoint(&mut self, _path: &Path) -> Result<(), DbError> { + Ok(()) + } } diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index f7c067e..c418f6e 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -1,5 +1,5 @@ use defs::{DbError, DenseVector, Payload, PointId}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; use crate::rocks_db::RocksDbStorage; @@ -18,6 +18,9 @@ pub trait StorageEngine: Send + Sync { fn delete_point(&self, id: PointId) -> Result<(), DbError>; fn contains_point(&self, id: PointId) -> Result; fn list_vectors(&self, offset: PointId, limit: usize) -> Result, DbError>; + + fn checkpoint(&self, path: &Path) -> Result<(), DbError>; + fn restore_checkpoint(&mut self, path: &Path) -> Result<(), DbError>; } pub mod in_memory; diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index f9c80ab..6c4584c 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -3,8 +3,17 @@ use crate::{StorageEngine, VectorPage}; use bincode::{deserialize, serialize}; use defs::{DbError, DenseVector, Payload, Point, PointId}; +use flate2::{ + Compression, + write::{GzDecoder, GzEncoder}, +}; use rocksdb::{DB, Error, Options}; -use std::path::PathBuf; +use std::{ + fs::File, + path::{Path, PathBuf}, +}; +use tar::{Archive, Builder}; +use tempfile::tempdir; //TODO: Implement RocksDbStorage with necessary fields and implementations //TODO: Optimize the basic design @@ -21,6 +30,16 @@ pub enum RocksDBStorageError { impl RocksDbStorage { // Creates new db or switches to existing db pub fn new(path: impl Into) -> Result { + let converted_path = path.into(); + let db = Self::initialize_db(&converted_path)?; + + Ok(RocksDbStorage { + path: converted_path, + db, + }) + } + + fn initialize_db(path: &Path) -> Result { // Initialize a db at the given location let mut options = Options::default(); @@ -30,15 +49,8 @@ impl RocksDbStorage { options.create_if_missing(true); - let converted_path = path.into(); - - let db = DB::open(&options, converted_path.clone()) - .map_err(|e| DbError::StorageError(e.into_string()))?; - - Ok(RocksDbStorage { - path: converted_path, - db, - }) + let db = DB::open(&options, path).map_err(|e| DbError::StorageError(e.into_string()))?; + Ok(db) } pub fn get_current_path(&self) -> PathBuf { @@ -152,6 +164,69 @@ impl StorageEngine for RocksDbStorage { } Ok(Some((result, last_id))) } + + fn checkpoint(&self, path: &Path) -> Result<(), DbError> { + // flush db first for durability + self.db.flush().map_err(|e| { + DbError::StorageCheckpointError(format!( + "Failed to flush database: {}", + e.into_string() + )) + })?; + + let temp_dir_parent = tempdir().unwrap(); + let temp_dir = temp_dir_parent.path().join("checkpoint"); + + let checkpoint = rocksdb::checkpoint::Checkpoint::new(&self.db) + .map_err(|e| DbError::StorageCheckpointError(e.into_string()))?; + checkpoint + .create_checkpoint(temp_dir.clone()) + .map_err(|e| DbError::StorageCheckpointError(e.into_string()))?; + + // compress the checkpoint into an archive + let tar_gz = File::create(path).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't compress rocksdb checkpoint: {}", e)) + })?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut tar = Builder::new(enc); + tar.append_dir_all("", temp_dir.clone()).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't compress rocksdb checkpoint: {}", e)) + })?; + + tar.into_inner().map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't compress rocksdb checkpoint: {}", e)) + })?; + + Ok(()) + } + + fn restore_checkpoint(&mut self, path: &Path) -> Result<(), DbError> { + let tar_gz = File::open(path).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't open rocksdb checkpoint: {}", e)) + })?; + let dec = GzDecoder::new(tar_gz); + let mut tar = Archive::new(dec); + + // remove existing stuff in data path + std::fs::remove_dir_all(&self.path).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't remove existing data: {}", e)) + })?; + + // create new data path + std::fs::create_dir_all(&self.path).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't create data path: {}", e)) + })?; + + tar.unpack(&self.path).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't unpack rocksdb checkpoint: {}", e)) + })?; + + // reinitialize db + self.db.cancel_all_background_work(true); + self.db = Self::initialize_db(&self.path)?; + + Ok(()) + } } #[cfg(test)] @@ -160,26 +235,24 @@ mod tests { use defs::ContentType; use uuid::Uuid; - use tempfile::tempdir; + use tempfile::{TempDir, tempdir}; - fn create_test_db() -> (RocksDbStorage, String) { + fn create_test_db() -> (RocksDbStorage, TempDir) { let temp_dir = tempdir().unwrap(); - let temp_dir_path = temp_dir.path().to_str().unwrap().to_string(); - let db = RocksDbStorage::new(temp_dir_path.clone()).expect("Failed to create RocksDB"); - (db, temp_dir_path) + let db = RocksDbStorage::new(temp_dir.path()).expect("Failed to create RocksDB"); + (db, temp_dir) } #[test] fn test_new_rocksdb_storage() { - let (db, path) = create_test_db(); - assert_eq!(db.get_current_path(), PathBuf::from(path.clone())); - std::fs::remove_dir_all(path).unwrap_or_default(); + let (db, temp_dir) = create_test_db(); + assert_eq!(db.get_current_path(), temp_dir.path()); } #[test] fn test_insert_and_get_vector() { - let (db, path) = create_test_db(); + let (db, _temp_dir) = create_test_db(); let id = Uuid::new_v4(); let vector = Some(vec![0.1, 0.2, 0.3]); let payload = Some(Payload { @@ -190,13 +263,11 @@ mod tests { assert!(db.insert_point(id, vector.clone(), payload).is_ok()); let result = db.get_vector(id).unwrap(); assert_eq!(result, vector); - - std::fs::remove_dir_all(path).unwrap_or_default(); } #[test] fn test_insert_and_get_payload() { - let (db, path) = create_test_db(); + let (db, _temp_dir) = create_test_db(); let id = Uuid::new_v4(); let payload = Some(Payload { content_type: ContentType::Text, @@ -212,13 +283,11 @@ mod tests { content: "Test".to_string(), }); assert_eq!(result, expected); - - std::fs::remove_dir_all(path).unwrap_or_default(); } #[test] fn test_contains_point() { - let (db, path) = create_test_db(); + let (db, _temp_dir) = create_test_db(); let id = Uuid::new_v4(); let payload = Some(Payload { content_type: ContentType::Text, @@ -231,13 +300,11 @@ mod tests { db.insert_point(id, vector, payload).unwrap(); assert!(db.contains_point(id).unwrap()); - - std::fs::remove_dir_all(path).unwrap_or_default(); } #[test] fn test_delete_point() { - let (db, path) = create_test_db(); + let (db, _temp_dir) = create_test_db(); let id = Uuid::new_v4(); let payload = Some(Payload { content_type: ContentType::Text, @@ -255,27 +322,42 @@ mod tests { assert!(!db.contains_point(id).unwrap()); assert_eq!(db.get_vector(id).unwrap(), None); assert_eq!(db.get_payload(id).unwrap(), None); - - std::fs::remove_dir_all(path).unwrap_or_default(); } #[test] fn test_get_nonexistent_vector() { - let (db, path) = create_test_db(); + let (db, _temp_dir) = create_test_db(); let id = Uuid::new_v4(); assert_eq!(db.get_vector(id).unwrap(), None); - - std::fs::remove_dir_all(path).unwrap_or_default(); } #[test] fn test_get_nonexistent_payload() { - let (db, path) = create_test_db(); + let (db, _temp_dir) = create_test_db(); let id = Uuid::new_v4(); assert_eq!(db.get_payload(id).unwrap(), None); + } + + #[test] + fn test_create_and_load_checkpoint() { + let (mut db, temp_dir) = create_test_db(); + let checkpoint_path = temp_dir.path().join("temp-checkpoint.tar.gz"); - std::fs::remove_dir_all(path).unwrap_or_default(); + let id = Uuid::new_v4(); + let vector = Some(vec![0.1, 0.2, 0.3]); + let payload = Some(Payload { + content_type: ContentType::Text, + content: "Test".to_string(), + }); + + assert!(db.insert_point(id, vector.clone(), payload).is_ok()); + + db.checkpoint(&checkpoint_path) + .expect("Failed to create checkpoint"); + db.restore_checkpoint(&checkpoint_path).unwrap(); + + assert!(db.contains_point(id).unwrap()); } } From 026df748b591fb42babddc3b68dc586742be79ef Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Fri, 2 Jan 2026 13:42:27 +0530 Subject: [PATCH 11/25] fix(rocksdb): restore from checkpoint function; unit test for the same --- crates/defs/src/error.rs | 1 + crates/storage/src/rocks_db.rs | 112 +++++++++++++++++++++++---------- 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index 251a909..e3aea98 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -11,6 +11,7 @@ pub enum DbError { UnsupportedSimilarity, DimensionMismatch, SnapshotError(String), + StorageInitializationError, StorageCheckpointError(String), } diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index 6c4584c..018ecba 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -3,10 +3,7 @@ use crate::{StorageEngine, VectorPage}; use bincode::{deserialize, serialize}; use defs::{DbError, DenseVector, Payload, Point, PointId}; -use flate2::{ - Compression, - write::{GzDecoder, GzEncoder}, -}; +use flate2::{Compression, read::GzDecoder, write::GzEncoder}; use rocksdb::{DB, Error, Options}; use std::{ fs::File, @@ -19,7 +16,7 @@ use tempfile::tempdir; //TODO: Optimize the basic design pub struct RocksDbStorage { pub path: PathBuf, - pub db: DB, + pub db: Option, } pub enum RocksDBStorageError { @@ -35,7 +32,7 @@ impl RocksDbStorage { Ok(RocksDbStorage { path: converted_path, - db, + db: Some(db), }) } @@ -72,7 +69,12 @@ impl StorageEngine for RocksDbStorage { payload, }; let value = serialize(&point).map_err(|e| DbError::SerializationError(e.to_string()))?; - match self.db.put(key.as_bytes(), value.as_slice()) { + match self + .db + .as_ref() + .ok_or(DbError::StorageInitializationError)? + .put(key.as_bytes(), value.as_slice()) + { Ok(_) => Ok(()), Err(e) => Err(DbError::StorageError(e.into_string())), } @@ -81,9 +83,16 @@ impl StorageEngine for RocksDbStorage { fn contains_point(&self, id: PointId) -> Result { // Efficient lookup inspired from https://github.com/facebook/rocksdb/issues/11586#issuecomment-1890429488 let key = id.to_string(); - if self.db.key_may_exist(key.clone()) { + if self + .db + .as_ref() + .ok_or(DbError::StorageInitializationError)? + .key_may_exist(key.clone()) + { let key_exist = self .db + .as_ref() + .ok_or(DbError::StorageInitializationError)? .get(key) .map_err(|e| DbError::StorageError(e.into_string()))? .is_some(); @@ -96,6 +105,8 @@ impl StorageEngine for RocksDbStorage { fn delete_point(&self, id: PointId) -> Result<(), DbError> { let key = id.to_string(); self.db + .as_ref() + .ok_or(DbError::StorageInitializationError)? .delete(key) .map_err(|e| DbError::StorageError(e.into_string()))?; @@ -106,6 +117,8 @@ impl StorageEngine for RocksDbStorage { let key = id.to_string(); let Some(value_serialized) = self .db + .as_ref() + .ok_or(DbError::StorageInitializationError)? .get(key) .map_err(|e| DbError::StorageError(e.into_string()))? else { @@ -122,6 +135,8 @@ impl StorageEngine for RocksDbStorage { let key = id.to_string(); let Some(value_serialized) = self .db + .as_ref() + .ok_or(DbError::StorageInitializationError)? .get(key) .map_err(|e| DbError::StorageError(e.into_string()))? else { @@ -140,10 +155,14 @@ impl StorageEngine for RocksDbStorage { } let mut result = Vec::with_capacity(limit); - let iter = self.db.iterator(rocksdb::IteratorMode::From( - offset.to_string().as_bytes(), - rocksdb::Direction::Forward, - )); + let iter = self + .db + .as_ref() + .ok_or(DbError::StorageInitializationError)? + .iterator(rocksdb::IteratorMode::From( + offset.to_string().as_bytes(), + rocksdb::Direction::Forward, + )); let mut last_id = offset; for item in iter { @@ -167,17 +186,25 @@ impl StorageEngine for RocksDbStorage { fn checkpoint(&self, path: &Path) -> Result<(), DbError> { // flush db first for durability - self.db.flush().map_err(|e| { - DbError::StorageCheckpointError(format!( - "Failed to flush database: {}", - e.into_string() - )) - })?; + self.db + .as_ref() + .ok_or(DbError::StorageInitializationError)? + .flush() + .map_err(|e| { + DbError::StorageCheckpointError(format!( + "Failed to flush database: {}", + e.into_string() + )) + })?; let temp_dir_parent = tempdir().unwrap(); let temp_dir = temp_dir_parent.path().join("checkpoint"); - let checkpoint = rocksdb::checkpoint::Checkpoint::new(&self.db) + let db_ref = self + .db + .as_ref() + .ok_or(DbError::StorageInitializationError)?; + let checkpoint = rocksdb::checkpoint::Checkpoint::new(db_ref) .map_err(|e| DbError::StorageCheckpointError(e.into_string()))?; checkpoint .create_checkpoint(temp_dir.clone()) @@ -185,16 +212,21 @@ impl StorageEngine for RocksDbStorage { // compress the checkpoint into an archive let tar_gz = File::create(path).map_err(|e| { - DbError::StorageCheckpointError(format!("Couldn't compress rocksdb checkpoint: {}", e)) + DbError::StorageCheckpointError(format!("Couldn't create tar archive file: {}", e)) })?; let enc = GzEncoder::new(tar_gz, Compression::default()); - let mut tar = Builder::new(enc); - tar.append_dir_all("", temp_dir.clone()).map_err(|e| { - DbError::StorageCheckpointError(format!("Couldn't compress rocksdb checkpoint: {}", e)) + let mut archive = Builder::new(enc); + + archive.append_dir_all("", temp_dir).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't append directory to archive: {}", e)) })?; - tar.into_inner().map_err(|e| { - DbError::StorageCheckpointError(format!("Couldn't compress rocksdb checkpoint: {}", e)) + let enc = archive.into_inner().map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't compress tar archive: {}", e)) + })?; + + enc.finish().map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't compress tar archive: {}", e)) })?; Ok(()) @@ -204,10 +236,16 @@ impl StorageEngine for RocksDbStorage { let tar_gz = File::open(path).map_err(|e| { DbError::StorageCheckpointError(format!("Couldn't open rocksdb checkpoint: {}", e)) })?; - let dec = GzDecoder::new(tar_gz); - let mut tar = Archive::new(dec); + let tar = GzDecoder::new(tar_gz); + let mut archive = Archive::new(tar); + archive.set_unpack_xattrs(false); + archive.set_preserve_permissions(false); // remove existing stuff in data path + self.db + .as_ref() + .ok_or(DbError::StorageInitializationError)? + .cancel_all_background_work(true); std::fs::remove_dir_all(&self.path).map_err(|e| { DbError::StorageCheckpointError(format!("Couldn't remove existing data: {}", e)) })?; @@ -217,13 +255,13 @@ impl StorageEngine for RocksDbStorage { DbError::StorageCheckpointError(format!("Couldn't create data path: {}", e)) })?; - tar.unpack(&self.path).map_err(|e| { - DbError::StorageCheckpointError(format!("Couldn't unpack rocksdb checkpoint: {}", e)) + archive.unpack(&self.path).map_err(|e| { + DbError::StorageCheckpointError(format!("Couldn't unpack tar.gz archive: {}", e)) })?; // reinitialize db - self.db.cancel_all_background_work(true); - self.db = Self::initialize_db(&self.path)?; + self.db = None; + self.db = Some(Self::initialize_db(&self.path)?); Ok(()) } @@ -343,21 +381,27 @@ mod tests { #[test] fn test_create_and_load_checkpoint() { let (mut db, temp_dir) = create_test_db(); + let checkpoint_path = temp_dir.path().join("temp-checkpoint.tar.gz"); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); - let id = Uuid::new_v4(); let vector = Some(vec![0.1, 0.2, 0.3]); let payload = Some(Payload { content_type: ContentType::Text, content: "Test".to_string(), }); - assert!(db.insert_point(id, vector.clone(), payload).is_ok()); + assert!(db.insert_point(id1, vector.clone(), payload.clone()).is_ok()); db.checkpoint(&checkpoint_path) .expect("Failed to create checkpoint"); + + assert!(db.insert_point(id2, vector.clone(), payload.clone()).is_ok()); + db.restore_checkpoint(&checkpoint_path).unwrap(); - assert!(db.contains_point(id).unwrap()); + assert!(db.contains_point(id1).unwrap()); + assert!(!db.contains_point(id2).unwrap()); } } From 9ce032ce658a0ceff3f467d689a6688e562aeab5 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sat, 3 Jan 2026 17:01:07 +0530 Subject: [PATCH 12/25] refactor snapshots and checkpoints --- Cargo.lock | 1 + crates/api/src/lib.rs | 96 +++++++++--- crates/defs/src/error.rs | 2 + crates/defs/src/types.rs | 2 + crates/index/Cargo.toml | 1 + crates/index/src/deserialize.rs | 15 ++ crates/index/src/flat/mod.rs | 4 + crates/index/src/flat/serialize.rs | 45 ++++-- crates/index/src/flat/tests.rs | 7 +- crates/index/src/kd_tree/mod.rs | 4 + crates/index/src/kd_tree/serialize.rs | 110 ++++++++----- crates/index/src/kd_tree/tests.rs | 8 +- crates/index/src/lib.rs | 23 ++- crates/snapshot/src/constants.rs | 7 + crates/snapshot/src/lib.rs | 218 ++++++++++++++++++++------ crates/snapshot/src/manifest.rs | 41 +++++ crates/snapshot/src/metadata.rs | 102 ++++++++++++ crates/snapshot/src/types.rs | 23 --- crates/snapshot/src/util.rs | 186 +++++++++++++--------- crates/storage/src/in_memory.rs | 15 +- crates/storage/src/lib.rs | 32 +++- crates/storage/src/rocks_db.rs | 46 ++++-- 22 files changed, 741 insertions(+), 247 deletions(-) create mode 100644 crates/index/src/deserialize.rs create mode 100644 crates/snapshot/src/constants.rs create mode 100644 crates/snapshot/src/manifest.rs create mode 100644 crates/snapshot/src/metadata.rs delete mode 100644 crates/snapshot/src/types.rs diff --git a/Cargo.lock b/Cargo.lock index 19f0378..8bc65f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1031,6 +1031,7 @@ dependencies = [ "bincode", "defs", "serde", + "storage", "uuid", ] diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 8e6b83e..81bd61e 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -1,13 +1,14 @@ use defs::{DbError, IndexedVector, Similarity}; use defs::{DenseVector, Payload, Point, PointId}; +use index::kd_tree::index::KDTree; use std::path::{Path, PathBuf}; // use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, RwLock}; use index::flat::index::FlatIndex; use index::{IndexType, VectorIndex}; -use snapshot::types::Snapshot; +use snapshot::Snapshot; use storage::rocks_db::RocksDbStorage; use storage::{StorageEngine, StorageType, VectorPage}; @@ -131,17 +132,13 @@ impl VectorDb { Ok(inserted) } - pub fn create_snapshot(&self, path: &Path) -> Result { - let index = self.index.read().map_err(|_| DbError::LockError)?; - let storage = self.storage.as_ref(); - - let snapshot = Snapshot::create(&*index, storage, path.to_path_buf())?; - Ok(snapshot) - } + // pub fn create_snapshot(&self, path: &Path) -> Result { + // let index = self.index.read().map_err(|_| DbError::LockError)?; + // let storage = self.storage.as_ref(); - pub fn restore_snapshot(&self, _path: &Path) -> Result<(), DbError> { - Ok(()) - } + // let snapshot = Snapshot::create(self)?; + // Ok(snapshot) + // } } #[derive(Debug)] @@ -152,6 +149,56 @@ pub struct DbConfig { pub dimension: usize, } +#[derive(Debug)] +pub struct DbRestoreConfig { + pub data_path: PathBuf, + pub snapshot_path: PathBuf, +} + +impl DbRestoreConfig { + pub fn new(data_path: PathBuf, snapshot_path: PathBuf) -> Self { + Self { + data_path, + snapshot_path, + } + } +} + +pub fn create_snapshot(db: &VectorDb, path: &Path) -> Result<(), DbError> { + if !path.is_dir() { + return Err(DbError::SnapshotError(format!( + "Invalid path: {}", + path.display() + ))); + } + + let index_snapshot = db.index.snapshot()?; + // let storage_snapshot = db.storage.snapshot()?; + + // let storage_checkpoint_path = temp_dir.path().join("storage-checkpoint.tar.gz"); + // storage.checkpoint(&storage_checkpoint_path)?; + + + // let snapshot = Snapshot::create(db)?; + Ok(()) +} + +// pub fn restore_from_snapshot(config: &DbRestoreConfig) -> Result { +// // snapshots only support rocksdb +// let mut storage = RocksDbStorage::new(config.data_path.clone())?; + +// // restore the index from the snapshot +// let (index_restored, restored_storage, dimension) = Snapshot::load(&config.snapshot_path, &config.data_path)?; + +// let index: RwLock = index_restored.into(); +// let storage : Arc = restored_storage.into(); + +// // Init the db +// let db = VectorDb::_new(storage, index, dimension); + +// Ok(db) +// } + pub fn init_api(config: DbConfig) -> Result { // Initialize the storage engine let storage = match config.storage_type { @@ -162,7 +209,8 @@ pub fn init_api(config: DbConfig) -> Result { // Initialize the vector index let index: Arc> = match config.index_type { IndexType::Flat => Arc::new(RwLock::new(FlatIndex::new())), - _ => Arc::new(RwLock::new(FlatIndex::new())), + IndexType::KDTree => Arc::new(RwLock::new(KDTree::build_empty(config.dimension))), + _ => Arc::new(RwLock::new(FlatIndex::new())), // TODO: add hnsw here }; // Init the db @@ -385,21 +433,31 @@ mod tests { } #[test] - fn test_create_snapshot() { - let (db, _temp_dir) = create_test_db(); + fn test_create_and_load_snapshot() { + let (old_db, temp_dir) = create_test_db(); - assert!( - db.insert( + let point_id = old_db + .insert( vec![0.0, 1.0, 2.0], Payload { content_type: ContentType::Text, content: format!("Test content {}", 0), }, ) - .is_ok() - ); + .unwrap(); let temp_snapshot_dir = tempdir().unwrap(); - assert!(db.create_snapshot(temp_snapshot_dir.path()).is_ok()); + + let snapshot = old_db.create_snapshot(temp_snapshot_dir.path()).unwrap(); + + let reload_config = DbRestoreConfig { + data_path: temp_dir.path().to_path_buf(), + snapshot_path: snapshot.path, + }; + + std::mem::drop(old_db); + let loaded_db = restore_from_snapshot(&reload_config).unwrap(); + + assert!(loaded_db.get(point_id).is_ok()); } } diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index e3aea98..ee2379b 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -13,6 +13,8 @@ pub enum DbError { SnapshotError(String), StorageInitializationError, StorageCheckpointError(String), + InvalidMagicBytes(String), + VectorNotFound(uuid::Uuid), } #[derive(Debug)] diff --git a/crates/defs/src/types.rs b/crates/defs/src/types.rs index ae69f17..faccf92 100644 --- a/crates/defs/src/types.rs +++ b/crates/defs/src/types.rs @@ -51,6 +51,8 @@ pub enum Similarity { Cosine, } +pub type Magic = [u8; 4]; + // Struct which stores the distance between a vector and query vector and implements ordering traits #[derive(Copy, Clone)] pub struct DistanceOrderedVector<'q> { diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index e8abf22..16a38ff 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -11,3 +11,4 @@ bincode.workspace = true defs.workspace = true serde.workspace = true uuid.workspace = true +storage.workspace = true diff --git a/crates/index/src/deserialize.rs b/crates/index/src/deserialize.rs new file mode 100644 index 0000000..92793fe --- /dev/null +++ b/crates/index/src/deserialize.rs @@ -0,0 +1,15 @@ +use defs::{DbError}; + +use crate::flat::index::FlatIndex; +use crate::kd_tree::index::KDTree; +use crate::{IndexSnapshot, IndexType, VectorIndex}; + +pub fn deserialize( + snapshot: &IndexSnapshot +) -> Result, DbError> { + match snapshot.index_type { + IndexType::Flat => Ok(Box::new(FlatIndex::deserialize(snapshot)?)), + IndexType::KDTree => Ok(Box::new(KDTree::deserialize(snapshot)?)), + IndexType::HNSW => Ok(Box::new(FlatIndex::deserialize(snapshot)?)), // TODO: change this for hnsw + } +} diff --git a/crates/index/src/flat/mod.rs b/crates/index/src/flat/mod.rs index 2fd3c64..5e3f726 100644 --- a/crates/index/src/flat/mod.rs +++ b/crates/index/src/flat/mod.rs @@ -1,5 +1,9 @@ +use defs::Magic; + pub mod index; mod serialize; #[cfg(test)] mod tests; + +pub const FLAT_MAGIC_BYTES: Magic = [0x00, 0x00, 0x00, 0x01]; diff --git a/crates/index/src/flat/serialize.rs b/crates/index/src/flat/serialize.rs index 191c053..bafa008 100644 --- a/crates/index/src/flat/serialize.rs +++ b/crates/index/src/flat/serialize.rs @@ -1,11 +1,12 @@ -use crate::SerializableIndexer; +use super::FLAT_MAGIC_BYTES; +use crate::{IndexSnapshot, SerializableIndex}; use crate::flat::index::FlatIndex; use defs::{DbError, IndexedVector}; use serde::{Deserialize, Serialize}; use std::io::{Cursor, Read}; use uuid::Uuid; +use crate::IndexType; -const FLAT_MAGIC_BYTES: [u8; 4] = [0x00, 0x00, 0x00, 0x01]; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FlatIndexMetadata { @@ -14,15 +15,19 @@ pub struct FlatIndexMetadata { impl FlatIndex { pub fn deserialize( - metadata_bytes: Vec, - topology_bytes: Vec, - ) -> Result, DbError> { - let metadata: FlatIndexMetadata = bincode::deserialize(&metadata_bytes).map_err(|e| { + IndexSnapshot { index_type, magic, topology_b, metadata_b }: &IndexSnapshot + ) -> Result { + + if magic != &FLAT_MAGIC_BYTES { + return Err(DbError::SerializationError(format!("Invalid magic bytes"))); + } + + let metadata: FlatIndexMetadata = bincode::deserialize(metadata_b).map_err(|e| { DbError::SerializationError(format!("Failed to deserialize FlatIndex Metadata: {}", e)) })?; let total_points = metadata.total_points; - let mut cursor = Cursor::new(topology_bytes); + let mut cursor = Cursor::new(topology_b); let mut vectors = Vec::new(); for _ in 0..total_points { @@ -40,14 +45,11 @@ impl FlatIndex { }); } - Ok(Box::new(FlatIndex { index: vectors })) + Ok(FlatIndex { index: vectors }) } } -impl SerializableIndexer for FlatIndex { - fn magic_bytes(&self) -> [u8; 4] { - FLAT_MAGIC_BYTES - } +impl SerializableIndex for FlatIndex { fn serialize_topology(&self) -> Result, DbError> { let mut buffer: Vec = Vec::new(); @@ -69,4 +71,23 @@ impl SerializableIndexer for FlatIndex { buffer.extend_from_slice(&metadata_bytes); Ok(buffer) } + + fn populate_vectors(&mut self, storage: &dyn storage::StorageEngine) -> Result<(), DbError> { + for item in &mut self.index { + item.vector = storage.get_vector(item.id)?.ok_or(DbError::VectorNotFound(item.id))?; + } + Ok(()) + } + + fn snapshot(&self) -> Result { + let topology = self.serialize_topology()?; + let metadata = self.serialize_metadata()?; + + Ok(IndexSnapshot { + metadata_b: metadata, + topology_b: topology, + magic: FLAT_MAGIC_BYTES, + index_type: IndexType::Flat, + }) + } } diff --git a/crates/index/src/flat/tests.rs b/crates/index/src/flat/tests.rs index c225445..d2134ff 100644 --- a/crates/index/src/flat/tests.rs +++ b/crates/index/src/flat/tests.rs @@ -1,5 +1,5 @@ use super::index::FlatIndex; -use crate::{SerializableIndexer, VectorIndex}; +use crate::{SerializableIndex, VectorIndex}; use defs::{IndexedVector, Similarity}; use uuid::Uuid; @@ -226,10 +226,9 @@ fn test_serialize_and_deserialize() { index_before.delete(id1).unwrap(); - let serialized_meta = index_before.serialize_metadata().unwrap(); - let serialized_topo = index_before.serialize_topology().unwrap(); + let snapshot = index_before.snapshot().unwrap(); - let idx = FlatIndex::deserialize(serialized_meta, serialized_topo).unwrap(); + let idx = FlatIndex::deserialize(&snapshot).unwrap(); assert_eq!(idx.index.len(), 3); assert!(!idx.index.contains(&v1)); diff --git a/crates/index/src/kd_tree/mod.rs b/crates/index/src/kd_tree/mod.rs index 715bba0..6ff5fb0 100644 --- a/crates/index/src/kd_tree/mod.rs +++ b/crates/index/src/kd_tree/mod.rs @@ -1,6 +1,10 @@ +use defs::Magic; + pub mod index; mod serialize; pub mod types; #[cfg(test)] mod tests; + +pub const KD_TREE_MAGIC_BYTES: Magic = [0x00, 0x00, 0x00, 0x00]; diff --git a/crates/index/src/kd_tree/serialize.rs b/crates/index/src/kd_tree/serialize.rs index 49d3bd3..c5ef025 100644 --- a/crates/index/src/kd_tree/serialize.rs +++ b/crates/index/src/kd_tree/serialize.rs @@ -1,16 +1,16 @@ use std::collections::HashSet; -use std::io::{Cursor, Read}; +use std::io::{Cursor, Read, Write}; +use super::KD_TREE_MAGIC_BYTES; use super::index::KDTree; use super::types::KDTreeNode; -use crate::SerializableIndexer; +use crate::{SerializableIndex, IndexSnapshot}; use bincode; use defs::{DbError, IndexedVector, PointId}; use serde::{Deserialize, Serialize}; +use storage::StorageEngine; use uuid::Uuid; -const KD_TREE_MAGIC_BYTES: [u8; 4] = [0x00, 0x00, 0x00, 0x00]; - #[derive(Serialize, Deserialize)] pub struct KDTreeMetadata { pub dim: usize, @@ -18,14 +18,12 @@ pub struct KDTreeMetadata { pub deleted_count: usize, } -impl SerializableIndexer for KDTree { - fn magic_bytes(&self) -> [u8; 4] { - KD_TREE_MAGIC_BYTES - } +impl SerializableIndex for KDTree { fn serialize_topology(&self) -> Result, DbError> { let mut buffer = Vec::new(); - self.serialize_topology_recursive(&self.root, &mut buffer)?; + let mut cursor = Cursor::new(&mut buffer); + serialize_topology_recursive(&self.root, &mut cursor)?; Ok(buffer) } @@ -42,6 +40,23 @@ impl SerializableIndexer for KDTree { buffer.extend_from_slice(metadata_bytes.as_slice()); Ok(buffer) } + + fn snapshot(&self) -> Result { + let topology_bytes = self.serialize_topology()?; + let metadata_bytes = self.serialize_metadata()?; + Ok(IndexSnapshot { + index_type: crate::IndexType::KDTree, + magic: KD_TREE_MAGIC_BYTES, + topology_b: topology_bytes, + metadata_b: metadata_bytes, + }) + } + + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError> { + populate_vectors_recursive(&mut self.root, storage)?; + Ok(()) + } + } const NODE_MARKER_BYTE: u8 = 1u8; @@ -51,58 +66,75 @@ const DELETED_MASK: u8 = 2u8; impl KDTree { pub fn deserialize( - metadata_bytes: Vec, - topology_bytes: Vec, - ) -> Result, DbError> { + IndexSnapshot { index_type, magic, topology_b, metadata_b }: &IndexSnapshot + ) -> Result { + + if magic != &KD_TREE_MAGIC_BYTES { + return Err(DbError::SerializationError(format!("Invalid magic bytes"))); + } + let metadata: KDTreeMetadata = - bincode::deserialize(metadata_bytes.as_slice()).map_err(|e| { + bincode::deserialize(metadata_b.as_slice()).map_err(|e| { DbError::SerializationError(format!( "Failed to deserailize KD Tree Metadata: {}", e )) })?; - let mut buf = Cursor::new(topology_bytes); + let mut buf = Cursor::new(topology_b); let mut non_deleted = HashSet::new(); let root = deserialize_topology_recursive(&mut buf, &mut non_deleted)?; - Ok(Box::new(KDTree { + Ok(KDTree { dim: metadata.dim, root, point_ids: non_deleted, total_nodes: metadata.total_nodes, deleted_count: metadata.deleted_count, - })) + }) } +} + + +// helper functions - fn serialize_topology_recursive( - &self, - current_opt: &Option>, - buffer: &mut Vec, - ) -> Result<(), DbError> { - if let Some(current) = current_opt { - let mut marker = NODE_MARKER_BYTE; - if current.is_deleted { - marker |= DELETED_MASK; - } - buffer.push(marker); - - let uuid_bytes = current.indexed_vector.id.to_bytes_le(); - buffer.extend_from_slice(&uuid_bytes); - - // serialize left subtree topology - self.serialize_topology_recursive(¤t.left, buffer)?; - // serialize right subtree topology - self.serialize_topology_recursive(¤t.right, buffer)?; - } else { - buffer.push(SKIP_MARKER_BYTE); +fn serialize_topology_recursive( + current_opt: &Option>, + buffer: &mut Cursor<&mut Vec>, +) -> Result<(), DbError> { + if let Some(current) = current_opt { + let mut marker = NODE_MARKER_BYTE; + if current.is_deleted { + marker |= DELETED_MASK; } - Ok(()) + buffer.write_all(&[marker]).map_err(|e| DbError::SerializationError(e.to_string()))?; + + let uuid_bytes = current.indexed_vector.id.to_bytes_le(); + buffer.write_all(&uuid_bytes).map_err(|e| DbError::SerializationError(e.to_string()))?; + + // serialize left subtree topology + serialize_topology_recursive(¤t.left, buffer)?; + // serialize right subtree topology + serialize_topology_recursive(¤t.right, buffer)?; + } else { + buffer.write_all(&[SKIP_MARKER_BYTE]).map_err(|e| DbError::SerializationError(e.to_string()))?; + } + Ok(()) +} + +fn populate_vectors_recursive(node: &mut Option>, storage: &dyn StorageEngine) -> Result<(), DbError> { + if let Some(node) = node { + let vector = storage.get_vector(node.indexed_vector.id)?.ok_or(DbError::VectorNotFound(node.indexed_vector.id))?; + node.indexed_vector.vector = vector; + + populate_vectors_recursive(&mut node.left, storage)?; + populate_vectors_recursive(&mut node.right, storage)?; } + Ok(()) } fn deserialize_topology_recursive( - buffer: &mut Cursor>, + buffer: &mut Cursor<&Vec>, non_deleted: &mut HashSet, ) -> Result>, DbError> { let mut current_marker: [u8; 1] = [0u8; 1]; diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs index 43f79a5..caa8ca2 100644 --- a/crates/index/src/kd_tree/tests.rs +++ b/crates/index/src/kd_tree/tests.rs @@ -1,5 +1,5 @@ use super::index::KDTree; -use crate::SerializableIndexer; +use crate::SerializableIndex; use crate::VectorIndex; use crate::distance; use crate::flat::index::FlatIndex; @@ -721,10 +721,8 @@ fn test_serialize_and_deserialize() { .unwrap(); tree_before.delete(id1).unwrap(); - let serialized_meta = tree_before.serialize_metadata().unwrap(); - let serialized_topo = tree_before.serialize_topology().unwrap(); - - let tree = KDTree::deserialize(serialized_meta, serialized_topo).unwrap(); + let snapshot = tree_before.snapshot().unwrap(); + let tree = KDTree::deserialize(&snapshot).unwrap(); assert!(tree.root.is_some()); assert_eq!(tree.dim, 3); diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index 2d0bdc7..cd95e0a 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -1,9 +1,14 @@ -use defs::{DbError, DenseVector, IndexedVector, PointId, Similarity}; +use defs::{DbError, DenseVector, IndexedVector, Magic, PointId, Similarity}; +use storage::StorageEngine; pub mod flat; pub mod kd_tree; -pub trait VectorIndex: Send + Sync + SerializableIndexer { +mod deserialize; +pub use crate::deserialize::*; + + +pub trait VectorIndex: Send + Sync + SerializableIndex { fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError>; // Returns true if point id existed and is deleted, else returns false @@ -66,10 +71,18 @@ pub enum IndexType { HNSW, } -pub trait SerializableIndexer { +pub struct IndexSnapshot { + pub index_type: IndexType, + pub magic: Magic, + pub topology_b: Vec, + pub metadata_b: Vec, +} + +pub trait SerializableIndex { fn serialize_topology(&self) -> Result, DbError>; fn serialize_metadata(&self) -> Result, DbError>; - fn magic_bytes(&self) -> [u8; 4]; - // fn deserialize(metadata: Vec, topology: Vec) -> Result, DbError>; + fn snapshot(&self) -> Result; + + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError>; } diff --git a/crates/snapshot/src/constants.rs b/crates/snapshot/src/constants.rs new file mode 100644 index 0000000..f202470 --- /dev/null +++ b/crates/snapshot/src/constants.rs @@ -0,0 +1,7 @@ +use semver::Version; + +pub const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); +pub const SMALL_ID_LEN: usize = 5; +pub const MANIFEST_FILE: &str = "manifest.json"; + +pub const STORAGE_CHECKPOINT_FILE: &str = "storage-checkpoint.tar.gz"; diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 3fec845..0960b42 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -1,29 +1,56 @@ -pub mod types; +pub mod constants; +pub mod manifest; +pub mod metadata; mod util; -use crate::types::{Snapshot, SnapshotManifest}; +use crate::{ + constants::{MANIFEST_FILE, SNAPSHOT_PARSER_VER, STORAGE_CHECKPOINT_FILE}, + manifest::Manifest, + util::{compress_archive, save_index_metadata, save_topology}, +}; + use chrono::{DateTime, Local}; use defs::DbError; -use index::VectorIndex; +use flate2::read::GzDecoder; +use index::{IndexSnapshot, VectorIndex}; use semver::Version; -use std::{path::PathBuf, time::SystemTime}; -use storage::StorageEngine; +use std::{ + fs::File, path::{Path}, time::SystemTime +}; +use storage::{StorageCheckpoint, StorageEngine, rocks_db::RocksDbStorage}; +use tar::Archive; use tempfile::tempdir; use uuid::Uuid; -const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); - // TODO: implement snapshot engine that runs in its own thread and wakes up in regular intervals +pub struct Snapshot { + pub id: Uuid, + pub date: SystemTime, + pub sem_ver: Version, + pub index_snapshot: IndexSnapshot, + pub storage_snapshot: StorageCheckpoint, +} + impl Snapshot { - pub fn create( - index: &dyn VectorIndex, - storage: &dyn StorageEngine, - path: PathBuf, - ) -> Result { + pub fn new(index_snapshot : IndexSnapshot, storage_snapshot : StorageCheckpoint) -> Self { let id = Uuid::new_v4(); let date = SystemTime::now(); + Snapshot { + id, + date, + sem_ver: SNAPSHOT_PARSER_VER, + index_snapshot, + storage_snapshot, + } + } + + pub fn save( + &self, + path: &Path + ) -> Result<(), DbError> { + if !path.is_dir() { return Err(DbError::SnapshotError(format!( "Invalid path: {}", @@ -33,71 +60,172 @@ impl Snapshot { let temp_dir = tempdir().map_err(|e| DbError::SnapshotError(e.to_string()))?; - let index_metadata_b = index.serialize_metadata()?; - let index_topology_b = index.serialize_topology()?; - - let magic_b = index.magic_bytes(); - // save index snapshots - let metadata_path = Self::save_metadata(temp_dir.path(), id, &index_metadata_b, &magic_b)?; - let topology_path = Self::save_topology(temp_dir.path(), id, &index_topology_b, &magic_b)?; + let index_metadata_path = + save_index_metadata(temp_dir.path(), self.id, &self.index_snapshot.metadata_b, &self.index_snapshot.magic, dimensions)?; - // save storage checkpoint - let storage_checkpoint_path = temp_dir.path().join("storage-checkpoint.tar.gz"); - storage.checkpoint(&storage_checkpoint_path)?; + let topology_path = save_topology(temp_dir.path(), self.id, &self.index_snapshot.topology_b, &self.index_snapshot.magic)?; // take checksums - let index_metadata_checksum = util::sha256_digest(&metadata_path) + let index_metadata_checksum = util::sha256_digest(&index_metadata_path) .map_err(|e| DbError::SnapshotError(e.to_string()))?; let index_topo_checksum = util::sha256_digest(&topology_path) .map_err(|e| DbError::SnapshotError(e.to_string()))?; - let storage_checkpoint_checksum = util::sha256_digest(&storage_checkpoint_path) + let storage_checkpoint_checksum = util::sha256_digest(&self.storage_snapshot.path) .map_err(|e| DbError::SnapshotError(e.to_string()))?; - let dt_now_local: DateTime = date.into(); + let dt_now_local: DateTime = self.date.into(); // create manifest file - let manifest = SnapshotManifest { - id, + let manifest = Manifest { + id: self.id, date: dt_now_local.timestamp(), - sem_ver: SNAPSHOT_PARSER_VER.to_string(), + sem_ver: constants::SNAPSHOT_PARSER_VER.to_string(), index_metadata_checksum, index_topo_checksum, storage_checkpoint_checksum, }; - let manifest_path = Self::save_manifest(temp_dir.path(), &manifest) + let manifest_path = manifest + .save(temp_dir.path()) .map_err(|e| DbError::SnapshotError(e.to_string()))?; + let tar_filename = format!( - "{}-{}-{}.tar.gz", - dt_now_local.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), - &(id.to_string()[..5]), - SNAPSHOT_PARSER_VER + "{}.tar.gz", + metadata::Metadata::new( + self.id, + self.date, + index_metadata_path.clone(), + constants::SNAPSHOT_PARSER_VER + ) ); let tar_gz_path = path.join(tar_filename); - Self::compress_archive( + compress_archive( &tar_gz_path, &[ - &metadata_path, + &index_metadata_path, &topology_path, - &storage_checkpoint_path, + &self.storage_snapshot.path, &manifest_path, ], temp_dir.path(), ) .map_err(|e| DbError::SnapshotError(e.to_string()))?; - - Ok(Snapshot { - id, - date, - path: tar_gz_path, - sem_ver: SNAPSHOT_PARSER_VER, - }) + Ok(()) } - // fn open(path : PathBuf) -> Result; + pub fn load( + path: &Path, + storage_data_path : &Path + ) -> Result<(Box, Box, usize), DbError> { + + // only rocksdb is supported for snapshots as of now + let mut storage_engine = Box::new(RocksDbStorage::new(storage_data_path)).map_err(|e| DbError::SnapshotError(format!("Failed to reinitialize storage engine: {}",e)))?; + + let tar_gz = File::open(path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't open snapshot: {}", e)))?; + let tar = GzDecoder::new(tar_gz); + let mut archive = Archive::new(tar); + + let snapshot_filename = path.file_name().ok_or(DbError::SnapshotError( + "Invalid snapshot filename".to_string(), + ))?; + let temp_dir = std::env::temp_dir().join(snapshot_filename); + + // remove any existing data + if temp_dir.exists() && !temp_dir.is_dir() { + std::fs::remove_file(temp_dir.clone()).map_err(|e| { + DbError::SnapshotError(format!("Couldn't remove existing file: {}", e)) + })?; + } else if temp_dir.is_dir() { + std::fs::remove_dir_all(temp_dir.clone()).map_err(|e| { + DbError::SnapshotError(format!("Couldn't remove existing directory: {}", e)) + })?; + } + + std::fs::create_dir(temp_dir.clone()).map_err(|e| { + DbError::SnapshotError(format!("Couldn't create temporary directory: {}", e)) + })?; + + archive + .unpack(temp_dir.clone()) + .map_err(|e| DbError::SnapshotError(format!("Couldn't unpack archive: {}", e)))?; + + // read manifest and validate + let manifest_path = temp_dir.join(MANIFEST_FILE); + if !manifest_path.is_file() { + return Err(DbError::SnapshotError( + "Manifest file not found".to_string(), + )); + } + + let manifest = Manifest::load(&manifest_path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't load manifest: {}", e)))?; + + if manifest.sem_ver != SNAPSHOT_PARSER_VER.to_string() { + return Err(DbError::SnapshotError( + "Incompatible snapshot version".to_string(), + )); + } + + let id = manifest.id; + let index_metadata_path = temp_dir.join(util::metadata_file_name(&id)); + let topology_path = temp_dir.join(util::topology_file_name(&id)); + let storage_checkpoint_path = temp_dir.join(STORAGE_CHECKPOINT_FILE); + + if !index_metadata_path.exists() + || !topology_path.exists() + || !storage_checkpoint_path.exists() + { + return Err(DbError::SnapshotError("Missing snapshot files".to_string())); + } + + // match checksums + if util::sha256_digest(&index_metadata_path).map_err(|_| { + DbError::SnapshotError("Could not calculate index metadata hash".to_string()) + })? != manifest.index_metadata_checksum + { + return Err(DbError::SnapshotError( + "Index metadata hash mismatch".to_string(), + )); + } + if util::sha256_digest(&topology_path) + .map_err(|_| DbError::SnapshotError("Could not calculate topology hash".to_string()))? + != manifest.index_topo_checksum + { + return Err(DbError::SnapshotError("Topology hash mismatch".to_string())); + } + if util::sha256_digest(&storage_checkpoint_path).map_err(|_| { + DbError::SnapshotError("Could not calculate storage checkpoint hash".to_string()) + })? != manifest.storage_checkpoint_checksum + { + return Err(DbError::SnapshotError( + "Storage checkpoint hash mismatch".to_string(), + )); + } + + let (mgmeta, dimensions, meta_bytes) = util::read_index_metadata(&index_metadata_path) + .map_err(|_| DbError::SnapshotError("Could not read metadata".to_string()))?; + let (mgtopo, topo_bytes) = util::read_index_topology(&topology_path) + .map_err(|_| DbError::SnapshotError("Could not read topology".to_string()))?; - // fn load(&self) -> Result((Box, Box)) + if mgtopo != mgmeta { + return Err(DbError::InvalidMagicBytes( + "Magic bytes don't match".to_string(), + )); + } + + storage_engine.restore_checkpoint(&storage_checkpoint_path)?; + let storage_engine_boxed: Box = Box::new(storage_engine); + + let vector_index : Box = index::deserialize( + meta_bytes, + topo_bytes, + index::index_type_from_magic(mgmeta)?, + )?; + + Ok((vector_index, storage_engine_boxed, dimensions)) + } } diff --git a/crates/snapshot/src/manifest.rs b/crates/snapshot/src/manifest.rs new file mode 100644 index 0000000..38c5a75 --- /dev/null +++ b/crates/snapshot/src/manifest.rs @@ -0,0 +1,41 @@ +use serde::{Deserialize, Serialize}; +use std::path::Path; +use std::{ + io::{BufReader, BufWriter, Error, Write}, + path::PathBuf, +}; +use uuid::Uuid; + +use crate::constants::MANIFEST_FILE; + +type UnixTimestamp = i64; + +#[derive(Serialize, Deserialize)] +pub struct Manifest { + pub id: Uuid, + pub date: UnixTimestamp, + pub sem_ver: String, + pub index_metadata_checksum: String, + pub index_topo_checksum: String, + pub storage_checkpoint_checksum: String, +} + +impl Manifest { + pub fn save(&self, path: &Path) -> Result { + let manifest_path = path.join(MANIFEST_FILE); + + let file = std::fs::File::create(manifest_path.clone())?; + let mut writer = BufWriter::new(file); + serde_json::to_writer(&mut writer, self)?; + writer.flush()?; + + Ok(manifest_path) + } + + pub fn load(path: &Path) -> Result { + let file = std::fs::File::open(path)?; + let mut reader = BufReader::new(file); + let manifest: Manifest = serde_json::from_reader(&mut reader)?; + Ok(manifest) + } +} diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs new file mode 100644 index 0000000..352654d --- /dev/null +++ b/crates/snapshot/src/metadata.rs @@ -0,0 +1,102 @@ +use crate::constants::SMALL_ID_LEN; +use chrono::DateTime; +use chrono::Local; +use defs::DbError; +use semver::Version; +use std::{fmt::Display, path::PathBuf, time::SystemTime}; +use std::{fs, path::Path}; +use uuid::Uuid; + +// Metadata is the data that can be parsed from the snapshot filename +pub struct Metadata { + pub small_id: String, + pub date: SystemTime, + pub path: PathBuf, + pub sem_ver: Version, +} + +impl Metadata { + pub fn new(id: Uuid, date: SystemTime, path: PathBuf, sem_ver: Version) -> Self { + Metadata { + small_id: id.to_string()[..SMALL_ID_LEN].to_string(), + date, + path, + sem_ver, + } + } + + pub fn parse(path: &Path) -> Result { + if !path.is_file() { + return Err(DbError::SnapshotError("File not found".to_string())); + } + + let filename = path + .file_name() + .ok_or(DbError::SnapshotError("No filename".to_string()))? + .to_str() + .ok_or(DbError::SnapshotError( + "Invalid UTF-8 in filename".to_string(), + ))?; + + let parts = filename.split('-').collect::>(); + + if parts.len() != 3 { + return Err(DbError::SnapshotError("Invalid filename".to_string())); + } + + let id = parts[0]; + if id.len() != SMALL_ID_LEN { + return Err(DbError::SnapshotError("Invalid UUID".to_string())); + } + + let date = chrono::DateTime::parse_from_rfc3339(parts[1]) + .map_err(|_| DbError::SnapshotError("Invalid date".to_string()))?; + let version = Version::parse(parts[2]) + .map_err(|_| DbError::SnapshotError("Invalid version".to_string()))?; + + Ok(Metadata { + small_id: id.to_string(), + date: date.into(), + path: path.to_path_buf(), + sem_ver: version, + }) + } + + pub fn snapshot_dir_metadata(path: &Path) -> Result, DbError> { + if !path.is_dir() { + return Err(DbError::SnapshotError( + "Path is not a directory".to_string(), + )); + } + + let mut metadata_vec = Vec::new(); + + for item in fs::read_dir(path).map_err(|_| { + DbError::SnapshotError(format!("Cannot read directory: {}", path.display())) + })? { + let entry = item.map_err(|_| { + DbError::SnapshotError(format!("Invalid entry: {}", path.display())) + })?; + let path = entry.path(); + if path.is_file() + && let Ok(metadata) = Self::parse(&path) + { + metadata_vec.push(metadata); + } + } + Ok(metadata_vec) + } +} + +impl Display for Metadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let dt_now_local: DateTime = self.date.into(); + write!( + f, + "{}-{}-{}", + self.small_id, + dt_now_local.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + self.sem_ver + ) + } +} diff --git a/crates/snapshot/src/types.rs b/crates/snapshot/src/types.rs deleted file mode 100644 index 17e092c..0000000 --- a/crates/snapshot/src/types.rs +++ /dev/null @@ -1,23 +0,0 @@ -use semver::Version; -use serde::{Deserialize, Serialize}; -use std::{path::PathBuf, time::SystemTime}; -use uuid::Uuid; - -pub struct Snapshot { - pub id: Uuid, - pub date: SystemTime, - pub path: PathBuf, - pub sem_ver: Version, -} - -type UnixTimestamp = i64; - -#[derive(Serialize, Deserialize)] -pub struct SnapshotManifest { - pub id: Uuid, - pub date: UnixTimestamp, - pub sem_ver: String, - pub index_metadata_checksum: String, - pub index_topo_checksum: String, - pub storage_checkpoint_checksum: String, -} diff --git a/crates/snapshot/src/util.rs b/crates/snapshot/src/util.rs index 9458d78..439a9bb 100644 --- a/crates/snapshot/src/util.rs +++ b/crates/snapshot/src/util.rs @@ -1,26 +1,22 @@ -use crate::types::{Snapshot, SnapshotManifest}; use data_encoding::HEXLOWER; use sha2::{Digest, Sha256}; use std::fs::File; use std::io::{BufReader, Error, Read}; use std::path::PathBuf; -use defs::DbError; -use flate2::{Compression, read::GzEncoder}; -use std::{ - io::{BufWriter, Write}, - path::Path, -}; +use defs::{DbError, Magic}; +use flate2::{Compression, write::GzEncoder}; +use std::{io::Write, path::Path}; use tar::Builder; use uuid::Uuid; #[inline] -fn metadata_file_name(id: &Uuid) -> String { +pub fn metadata_file_name(id: &Uuid) -> String { format!("{}-index-meta.bin", id) } #[inline] -fn topology_file_name(id: &Uuid) -> String { +pub fn topology_file_name(id: &Uuid) -> String { format!("{}-index-topo.bin", id) } @@ -44,76 +40,120 @@ pub fn sha256_digest(path: &PathBuf) -> Result { Ok(HEXLOWER.encode(digest.as_ref())) } -impl Snapshot { - pub fn save_metadata( - path: &Path, - uuid: Uuid, - bytes: &[u8], - magic: &[u8; 4], - ) -> Result { - let file_name = metadata_file_name(&uuid); - let metadata_file_path = path.join(file_name); - - let mut file = std::fs::File::create(metadata_file_path.clone()).map_err(|e| { - DbError::SnapshotError(format!("Could not create metadata file: {}", e)) - })?; - - file.write_all(magic) - .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; - file.write_all(&bytes.len().to_le_bytes()) - .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; - file.write_all(bytes) - .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; - - Ok(metadata_file_path) - } +pub fn save_index_metadata( + path: &Path, + uuid: Uuid, + bytes: &[u8], + magic: &Magic, + dimensions: usize, +) -> Result { + let file_name = metadata_file_name(&uuid); + let metadata_file_path = path.join(file_name); + + let mut file = std::fs::File::create(metadata_file_path.clone()) + .map_err(|e| DbError::SnapshotError(format!("Could not create metadata file: {}", e)))?; + + file.write_all(magic) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(&dimensions.to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(&bytes.len().to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(bytes) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + + Ok(metadata_file_path) +} + +pub fn save_topology( + path: &Path, + uuid: Uuid, + bytes: &[u8], + magic: &Magic, +) -> Result { + let file_name = topology_file_name(&uuid); + let topology_file_path = path.join(file_name); + + let mut file = std::fs::File::create(topology_file_path.clone()) + .map_err(|e| DbError::SnapshotError(format!("Could not create topology file: {}", e)))?; + + file.write_all(magic) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + file.write_all(&bytes.len().to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + file.write_all(bytes) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + + Ok(topology_file_path) +} + +pub fn compress_archive(path: &Path, files: &[&Path], base_dir: &Path) -> Result<(), Error> { + let tar_gz = File::create(path)?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut tar = Builder::new(enc); - pub fn save_topology( - path: &Path, - uuid: Uuid, - bytes: &[u8], - magic: &[u8; 4], - ) -> Result { - let file_name = topology_file_name(&uuid); - let topology_file_path = path.join(file_name); - - let mut file = std::fs::File::create(topology_file_path.clone()).map_err(|e| { - DbError::SnapshotError(format!("Could not create topology file: {}", e)) - })?; - - file.write_all(magic) - .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; - file.write_all(&bytes.len().to_le_bytes()) - .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; - file.write_all(bytes) - .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; - - Ok(topology_file_path) + for file in files { + let rel_path = file.file_name().unwrap(); + let mut f = File::open(file)?; + tar.append_file(rel_path, &mut f)?; } - pub fn save_manifest(path: &Path, manifest: &SnapshotManifest) -> Result { - let manifest_path = path.join("manifest.json"); + tar.into_inner()?; + Ok(()) +} - let file = std::fs::File::create(manifest_path.clone())?; - let mut writer = BufWriter::new(file); - serde_json::to_writer(&mut writer, manifest)?; - writer.flush()?; +pub fn read_index_topology(path: &Path) -> Result<(Magic, Vec), DbError> { + let mut file = File::open(path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't open topology file: {}", e)))?; - Ok(manifest_path) - } + let mut magic = Magic::default(); + file.read_exact(&mut magic).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read magic from topology file: {}", e)) + })?; - pub fn compress_archive(path: &Path, files: &[&Path], base_dir: &Path) -> Result<(), Error> { - let tar_gz = File::create(path)?; - let enc = GzEncoder::new(tar_gz, Compression::default()); - let mut tar = Builder::new(enc); + let mut len_bytes = [0u8; size_of::()]; + file.read_exact(&mut len_bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read length from topology file: {}", e)) + })?; - for file in files { - let rel_path = file.strip_prefix(base_dir).unwrap_or(file); - let mut f = File::open(file)?; - tar.append_file(rel_path, &mut f)?; - } + let len = usize::from_le_bytes(len_bytes); + let mut bytes = vec![0u8; len]; + file.read_exact(&mut bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read bytes from topology file: {}", e)) + })?; - tar.into_inner()?; - Ok(()) - } + Ok((magic, bytes)) +} + +pub fn read_index_metadata(path: &Path) -> Result<(Magic, usize, Vec), DbError> { + let mut file = File::open(path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't open metadata file: {}", e)))?; + + let mut magic = Magic::default(); + file.read_exact(&mut magic).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read magic from metadata file: {}", e)) + })?; + + let mut dimensions_bytes = [0u8; size_of::()]; + file.read_exact(&mut dimensions_bytes).map_err(|e| { + DbError::SnapshotError(format!( + "Couldn't read dimensions from metadata file: {}", + e + )) + })?; + + let dimensions = usize::from_le_bytes(dimensions_bytes); + + let mut len_bytes = [0u8; size_of::()]; + file.read_exact(&mut len_bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read length from metadata file: {}", e)) + })?; + + let len = usize::from_le_bytes(len_bytes); + let mut bytes = vec![0u8; len]; + file.read_exact(&mut bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read bytes from metadata file: {}", e)) + })?; + + Ok((magic, dimensions, bytes)) } diff --git a/crates/storage/src/in_memory.rs b/crates/storage/src/in_memory.rs index 5f384a1..19df632 100644 --- a/crates/storage/src/in_memory.rs +++ b/crates/storage/src/in_memory.rs @@ -1,6 +1,9 @@ -use crate::{StorageEngine, VectorPage}; +use crate::{StorageCheckpoint, StorageEngine, VectorPage}; use defs::{DbError, DenseVector, Payload, PointId}; -use std::path::Path; +use std::path::{Path, PathBuf}; +use crate::StorageType; + + pub struct MemoryStorage { // define here how MemoryStorage will be defined @@ -42,10 +45,12 @@ impl StorageEngine for MemoryStorage { fn list_vectors(&self, _offset: PointId, _limit: usize) -> Result, DbError> { Ok(None) } - fn checkpoint(&self, _path: &Path) -> Result<(), DbError> { - Ok(()) + + fn checkpoint_at(&self, _path: &Path) -> Result { + Ok(StorageCheckpoint { path: PathBuf::default(), storage_type: StorageType::InMemory}) } - fn restore_checkpoint(&mut self, _path: &Path) -> Result<(), DbError> { + + fn restore_checkpoint(&mut self, _checkpoint: &StorageCheckpoint) -> Result<(), DbError> { Ok(()) } } diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index c418f6e..134dc35 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -1,8 +1,9 @@ use defs::{DbError, DenseVector, Payload, PointId}; +use tempfile::TempDir; use std::path::{Path, PathBuf}; use std::sync::Arc; -use crate::rocks_db::RocksDbStorage; +use crate::rocks_db::{ROCKSDB_CHECKPOINT_FILENAME_MARKER, RocksDbStorage}; pub type VectorPage = (Vec<(PointId, DenseVector)>, PointId); @@ -19,14 +20,14 @@ pub trait StorageEngine: Send + Sync { fn contains_point(&self, id: PointId) -> Result; fn list_vectors(&self, offset: PointId, limit: usize) -> Result, DbError>; - fn checkpoint(&self, path: &Path) -> Result<(), DbError>; - fn restore_checkpoint(&mut self, path: &Path) -> Result<(), DbError>; + fn checkpoint_at(&self, path: &Path) -> Result; + fn restore_checkpoint(&mut self, checkpoint: &StorageCheckpoint) -> Result<(), DbError>; } pub mod in_memory; pub mod rocks_db; -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum StorageType { InMemory, RocksDb, @@ -44,3 +45,26 @@ pub fn create_storage_engine( }, } } + + +pub struct StorageCheckpoint { + pub path: PathBuf, + pub storage_type: StorageType, +} + +impl StorageCheckpoint { + fn open(path: &Path) -> Result { + let filename = path.file_name().ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))?.to_str().ok_or_else(|| DbError::StorageCheckpointError("Invalid UTF-8 in filename".to_string()))?.to_owned(); + let marker = filename.split_once("-").ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))?.0; + + let storage_type = match marker { + ROCKSDB_CHECKPOINT_FILENAME_MARKER => StorageType::RocksDb, + _ => return Err(DbError::StorageCheckpointError("Invalid storage type".to_string())), + }; + + Ok(StorageCheckpoint { + path: path.to_path_buf(), + storage_type, + }) + } +} diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index 018ecba..bcc795a 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -1,6 +1,6 @@ // Rewrite needed -use crate::{StorageEngine, VectorPage}; +use crate::{StorageCheckpoint, StorageEngine, VectorPage}; use bincode::{deserialize, serialize}; use defs::{DbError, DenseVector, Payload, Point, PointId}; use flate2::{Compression, read::GzDecoder, write::GzEncoder}; @@ -11,6 +11,7 @@ use std::{ }; use tar::{Archive, Builder}; use tempfile::tempdir; +use crate::StorageType; //TODO: Implement RocksDbStorage with necessary fields and implementations //TODO: Optimize the basic design @@ -21,9 +22,10 @@ pub struct RocksDbStorage { pub enum RocksDBStorageError { RocksDBError(Error), - SerializationError, } +pub const ROCKSDB_CHECKPOINT_FILENAME_MARKER: &str = "rocksdb"; + impl RocksDbStorage { // Creates new db or switches to existing db pub fn new(path: impl Into) -> Result { @@ -184,7 +186,7 @@ impl StorageEngine for RocksDbStorage { Ok(Some((result, last_id))) } - fn checkpoint(&self, path: &Path) -> Result<(), DbError> { + fn checkpoint_at(&self, path: &Path) -> Result { // flush db first for durability self.db .as_ref() @@ -197,6 +199,10 @@ impl StorageEngine for RocksDbStorage { )) })?; + // filename is rocksdb-{uuid}.tar.gz + let checkpoint_filename = format!("{}-{}.tar.gz",ROCKSDB_CHECKPOINT_FILENAME_MARKER,uuid::Uuid::new_v4().to_string()); + let checkpoint_path = path.join(checkpoint_filename); + let temp_dir_parent = tempdir().unwrap(); let temp_dir = temp_dir_parent.path().join("checkpoint"); @@ -211,7 +217,7 @@ impl StorageEngine for RocksDbStorage { .map_err(|e| DbError::StorageCheckpointError(e.into_string()))?; // compress the checkpoint into an archive - let tar_gz = File::create(path).map_err(|e| { + let tar_gz = File::create(checkpoint_path.clone()).map_err(|e| { DbError::StorageCheckpointError(format!("Couldn't create tar archive file: {}", e)) })?; let enc = GzEncoder::new(tar_gz, Compression::default()); @@ -229,17 +235,25 @@ impl StorageEngine for RocksDbStorage { DbError::StorageCheckpointError(format!("Couldn't compress tar archive: {}", e)) })?; - Ok(()) + Ok(StorageCheckpoint { path: checkpoint_path, storage_type: crate::StorageType::RocksDb }) } - fn restore_checkpoint(&mut self, path: &Path) -> Result<(), DbError> { - let tar_gz = File::open(path).map_err(|e| { + fn restore_checkpoint(&mut self, checkpoint: &StorageCheckpoint) -> Result<(), DbError> { + // enforce storage type + if checkpoint.storage_type != StorageType::RocksDb { + return Err(DbError::StorageCheckpointError(format!("Invalid storage type"))); + } + // enforce filename marker - should have been enforced during StoraegCheckpoint::open anyway + let checkpoint_filename = checkpoint.path.file_name().ok_or(DbError::StorageCheckpointError("Could not read checkpoint filename".to_string()))?.to_str().ok_or(DbError::StorageCheckpointError("Could not read checkpoint filename".to_string()))?; + if !checkpoint.path.ends_with(".tar.gz") && checkpoint_filename.starts_with(ROCKSDB_CHECKPOINT_FILENAME_MARKER) { + return Err(DbError::StorageCheckpointError(format!("Invalid filename"))); + } + + let tar_gz = File::open(&checkpoint.path).map_err(|e| { DbError::StorageCheckpointError(format!("Couldn't open rocksdb checkpoint: {}", e)) })?; let tar = GzDecoder::new(tar_gz); let mut archive = Archive::new(tar); - archive.set_unpack_xattrs(false); - archive.set_preserve_permissions(false); // remove existing stuff in data path self.db @@ -392,14 +406,20 @@ mod tests { content: "Test".to_string(), }); - assert!(db.insert_point(id1, vector.clone(), payload.clone()).is_ok()); + assert!( + db.insert_point(id1, vector.clone(), payload.clone()) + .is_ok() + ); - db.checkpoint(&checkpoint_path) + let checkpoint = db.checkpoint_at(&checkpoint_path) .expect("Failed to create checkpoint"); - assert!(db.insert_point(id2, vector.clone(), payload.clone()).is_ok()); + assert!( + db.insert_point(id2, vector.clone(), payload.clone()) + .is_ok() + ); - db.restore_checkpoint(&checkpoint_path).unwrap(); + db.restore_checkpoint(&checkpoint).unwrap(); assert!(db.contains_point(id1).unwrap()); assert!(!db.contains_point(id2).unwrap()); From 43cb3967e0910f5bf968b428225c471479206941 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sun, 4 Jan 2026 08:13:52 +0530 Subject: [PATCH 13/25] implement repopulate vector on snapshot reload --- Cargo.lock | 1 + crates/api/src/lib.rs | 79 +++++++------- crates/index/src/deserialize.rs | 15 --- crates/index/src/flat/serialize.rs | 31 ++++-- crates/index/src/flat/tests.rs | 17 +-- crates/index/src/kd_tree/serialize.rs | 42 ++++++-- crates/index/src/kd_tree/tests.rs | 3 +- crates/index/src/lib.rs | 7 +- crates/snapshot/src/lib.rs | 147 ++++++++++++++++++-------- crates/snapshot/src/manifest.rs | 6 ++ crates/snapshot/src/metadata.rs | 12 ++- crates/snapshot/src/util.rs | 33 ++---- crates/storage/Cargo.toml | 1 + crates/storage/src/checkpoint.rs | 51 +++++++++ crates/storage/src/in_memory.rs | 11 +- crates/storage/src/lib.rs | 38 ++----- crates/storage/src/rocks_db.rs | 42 ++++++-- 17 files changed, 336 insertions(+), 200 deletions(-) delete mode 100644 crates/index/src/deserialize.rs create mode 100644 crates/storage/src/checkpoint.rs diff --git a/Cargo.lock b/Cargo.lock index 8bc65f6..b338615 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2093,6 +2093,7 @@ dependencies = [ "defs", "flate2", "rocksdb", + "serde", "tar", "tempfile", "uuid", diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 81bd61e..ccadea8 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -3,6 +3,7 @@ use defs::{DbError, IndexedVector, Similarity}; use defs::{DenseVector, Payload, Point, PointId}; use index::kd_tree::index::KDTree; use std::path::{Path, PathBuf}; +use tempfile::tempdir; // use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, RwLock}; @@ -132,13 +133,29 @@ impl VectorDb { Ok(inserted) } - // pub fn create_snapshot(&self, path: &Path) -> Result { - // let index = self.index.read().map_err(|_| DbError::LockError)?; - // let storage = self.storage.as_ref(); + // create snapshot at specificied directory + pub fn create_snapshot(&self, dir_path: &Path) -> Result { + if !dir_path.is_dir() { + return Err(DbError::SnapshotError(format!( + "Invalid path: {}", + dir_path.display() + ))); + } + + let index_snapshot = self + .index + .read() + .map_err(|_| DbError::LockError)? + .snapshot()?; + + let tempdir = tempdir().unwrap(); + let storage_checkpoint = self.storage.checkpoint_at(tempdir.path())?; - // let snapshot = Snapshot::create(self)?; - // Ok(snapshot) - // } + let snapshot = Snapshot::new(index_snapshot, storage_checkpoint, self.dimension)?; + let snapshot_path = snapshot.save(dir_path)?; + + Ok(snapshot_path) + } } #[derive(Debug)] @@ -164,41 +181,13 @@ impl DbRestoreConfig { } } -pub fn create_snapshot(db: &VectorDb, path: &Path) -> Result<(), DbError> { - if !path.is_dir() { - return Err(DbError::SnapshotError(format!( - "Invalid path: {}", - path.display() - ))); - } - - let index_snapshot = db.index.snapshot()?; - // let storage_snapshot = db.storage.snapshot()?; - - // let storage_checkpoint_path = temp_dir.path().join("storage-checkpoint.tar.gz"); - // storage.checkpoint(&storage_checkpoint_path)?; - - - // let snapshot = Snapshot::create(db)?; - Ok(()) +pub fn restore_from_snapshot(config: &DbRestoreConfig) -> Result { + // restore the index from the snapshot + let (storage_engine, index, dimensions) = + Snapshot::load(&config.snapshot_path, &config.data_path)?; + Ok(VectorDb::_new(storage_engine, index, dimensions)) } -// pub fn restore_from_snapshot(config: &DbRestoreConfig) -> Result { -// // snapshots only support rocksdb -// let mut storage = RocksDbStorage::new(config.data_path.clone())?; - -// // restore the index from the snapshot -// let (index_restored, restored_storage, dimension) = Snapshot::load(&config.snapshot_path, &config.data_path)?; - -// let index: RwLock = index_restored.into(); -// let storage : Arc = restored_storage.into(); - -// // Init the db -// let db = VectorDb::_new(storage, index, dimension); - -// Ok(db) -// } - pub fn init_api(config: DbConfig) -> Result { // Initialize the storage engine let storage = match config.storage_type { @@ -236,7 +225,7 @@ mod tests { let temp_dir = tempdir().unwrap(); let config = DbConfig { storage_type: StorageType::RocksDb, - index_type: IndexType::Flat, + index_type: IndexType::KDTree, data_path: temp_dir.path().to_path_buf(), dimension: 3, }; @@ -436,9 +425,10 @@ mod tests { fn test_create_and_load_snapshot() { let (old_db, temp_dir) = create_test_db(); + let vec1 = vec![0.0, 1.0, 2.0]; let point_id = old_db .insert( - vec![0.0, 1.0, 2.0], + vec1.clone(), Payload { content_type: ContentType::Text, content: format!("Test content {}", 0), @@ -448,16 +438,19 @@ mod tests { let temp_snapshot_dir = tempdir().unwrap(); - let snapshot = old_db.create_snapshot(temp_snapshot_dir.path()).unwrap(); + let snapshot_path = old_db.create_snapshot(temp_snapshot_dir.path()).unwrap(); let reload_config = DbRestoreConfig { data_path: temp_dir.path().to_path_buf(), - snapshot_path: snapshot.path, + snapshot_path: snapshot_path, }; std::mem::drop(old_db); let loaded_db = restore_from_snapshot(&reload_config).unwrap(); assert!(loaded_db.get(point_id).is_ok()); + + // check if vectors was restored + assert!(loaded_db.get(point_id).unwrap().unwrap().vector.unwrap() == vec1); } } diff --git a/crates/index/src/deserialize.rs b/crates/index/src/deserialize.rs deleted file mode 100644 index 92793fe..0000000 --- a/crates/index/src/deserialize.rs +++ /dev/null @@ -1,15 +0,0 @@ -use defs::{DbError}; - -use crate::flat::index::FlatIndex; -use crate::kd_tree::index::KDTree; -use crate::{IndexSnapshot, IndexType, VectorIndex}; - -pub fn deserialize( - snapshot: &IndexSnapshot -) -> Result, DbError> { - match snapshot.index_type { - IndexType::Flat => Ok(Box::new(FlatIndex::deserialize(snapshot)?)), - IndexType::KDTree => Ok(Box::new(KDTree::deserialize(snapshot)?)), - IndexType::HNSW => Ok(Box::new(FlatIndex::deserialize(snapshot)?)), // TODO: change this for hnsw - } -} diff --git a/crates/index/src/flat/serialize.rs b/crates/index/src/flat/serialize.rs index bafa008..32af97e 100644 --- a/crates/index/src/flat/serialize.rs +++ b/crates/index/src/flat/serialize.rs @@ -1,12 +1,12 @@ use super::FLAT_MAGIC_BYTES; -use crate::{IndexSnapshot, SerializableIndex}; +use crate::IndexType; use crate::flat::index::FlatIndex; +use crate::{IndexSnapshot, SerializableIndex}; use defs::{DbError, IndexedVector}; use serde::{Deserialize, Serialize}; use std::io::{Cursor, Read}; +use storage::StorageEngine; use uuid::Uuid; -use crate::IndexType; - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FlatIndexMetadata { @@ -15,11 +15,23 @@ pub struct FlatIndexMetadata { impl FlatIndex { pub fn deserialize( - IndexSnapshot { index_type, magic, topology_b, metadata_b }: &IndexSnapshot + IndexSnapshot { + index_type, + magic, + topology_b, + metadata_b, + }: &IndexSnapshot, ) -> Result { + if index_type != &IndexType::Flat { + return Err(DbError::SerializationError( + "Invalid index type".to_string(), + )); + } if magic != &FLAT_MAGIC_BYTES { - return Err(DbError::SerializationError(format!("Invalid magic bytes"))); + return Err(DbError::SerializationError( + "Invalid magic bytes".to_string(), + )); } let metadata: FlatIndexMetadata = bincode::deserialize(metadata_b).map_err(|e| { @@ -50,12 +62,12 @@ impl FlatIndex { } impl SerializableIndex for FlatIndex { - fn serialize_topology(&self) -> Result, DbError> { let mut buffer: Vec = Vec::new(); for point in &self.index { buffer.extend_from_slice(&point.id.to_bytes_le()); } + Ok(buffer) } @@ -68,13 +80,16 @@ impl SerializableIndex for FlatIndex { let metadata_bytes = bincode::serialize(&metadata).map_err(|e| { DbError::SerializationError(format!("Failed to serialize FlatIndex Metadata: {}", e)) })?; + buffer.extend_from_slice(&metadata_bytes); Ok(buffer) } - fn populate_vectors(&mut self, storage: &dyn storage::StorageEngine) -> Result<(), DbError> { + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError> { for item in &mut self.index { - item.vector = storage.get_vector(item.id)?.ok_or(DbError::VectorNotFound(item.id))?; + item.vector = storage + .get_vector(item.id)? + .ok_or(DbError::VectorNotFound(item.id))?; } Ok(()) } diff --git a/crates/index/src/flat/tests.rs b/crates/index/src/flat/tests.rs index d2134ff..ca5f239 100644 --- a/crates/index/src/flat/tests.rs +++ b/crates/index/src/flat/tests.rs @@ -196,8 +196,8 @@ fn test_default() { } #[test] -fn test_serialize_and_deserialize() { - // currently fails because vectors arent restored +fn test_serialize_and_deserialize_topo() { + // TODO: currently only tests topology and not vector restore; requires InMemory storage for vector restore testing (RocksDB seems to heavy to be used here for testing) let id1 = Uuid::new_v4(); let id2 = Uuid::new_v4(); let id3 = Uuid::new_v4(); @@ -222,7 +222,7 @@ fn test_serialize_and_deserialize() { let vectors = vec![v1.clone(), v2.clone(), v3.clone(), v4.clone()]; let mut index_before = FlatIndex::build(vectors); - index_before.insert(v4).unwrap(); + index_before.insert(v4.clone()).unwrap(); index_before.delete(id1).unwrap(); @@ -230,9 +230,10 @@ fn test_serialize_and_deserialize() { let idx = FlatIndex::deserialize(&snapshot).unwrap(); - assert_eq!(idx.index.len(), 3); - assert!(!idx.index.contains(&v1)); - assert!(idx.index.contains(&v2)); - assert!(idx.index.contains(&v3)); - assert!(idx.index.contains(&v3)); + assert_eq!(idx.index.len(), 4); + assert!(!idx.index.iter().any(|v| v.id == id1)); + assert!(idx.index.iter().any(|v| v.id == id2)); + assert!(idx.index.iter().any(|v| v.id == id3)); + assert!(idx.index.iter().any(|v| v.id == id3)); + assert!(idx.index.iter().any(|v| v.id == id4)); } diff --git a/crates/index/src/kd_tree/serialize.rs b/crates/index/src/kd_tree/serialize.rs index c5ef025..99ccc20 100644 --- a/crates/index/src/kd_tree/serialize.rs +++ b/crates/index/src/kd_tree/serialize.rs @@ -4,7 +4,7 @@ use std::io::{Cursor, Read, Write}; use super::KD_TREE_MAGIC_BYTES; use super::index::KDTree; use super::types::KDTreeNode; -use crate::{SerializableIndex, IndexSnapshot}; +use crate::{IndexSnapshot, IndexType, SerializableIndex}; use bincode; use defs::{DbError, IndexedVector, PointId}; use serde::{Deserialize, Serialize}; @@ -19,7 +19,6 @@ pub struct KDTreeMetadata { } impl SerializableIndex for KDTree { - fn serialize_topology(&self) -> Result, DbError> { let mut buffer = Vec::new(); let mut cursor = Cursor::new(&mut buffer); @@ -56,7 +55,6 @@ impl SerializableIndex for KDTree { populate_vectors_recursive(&mut self.root, storage)?; Ok(()) } - } const NODE_MARKER_BYTE: u8 = 1u8; @@ -66,11 +64,23 @@ const DELETED_MASK: u8 = 2u8; impl KDTree { pub fn deserialize( - IndexSnapshot { index_type, magic, topology_b, metadata_b }: &IndexSnapshot + IndexSnapshot { + index_type, + magic, + topology_b, + metadata_b, + }: &IndexSnapshot, ) -> Result { + if index_type != &IndexType::KDTree { + return Err(DbError::SerializationError( + "Invalid index type".to_string(), + )); + } if magic != &KD_TREE_MAGIC_BYTES { - return Err(DbError::SerializationError(format!("Invalid magic bytes"))); + return Err(DbError::SerializationError( + "Invalid magic bytes".to_string(), + )); } let metadata: KDTreeMetadata = @@ -95,7 +105,6 @@ impl KDTree { } } - // helper functions fn serialize_topology_recursive( @@ -107,24 +116,35 @@ fn serialize_topology_recursive( if current.is_deleted { marker |= DELETED_MASK; } - buffer.write_all(&[marker]).map_err(|e| DbError::SerializationError(e.to_string()))?; + buffer + .write_all(&[marker]) + .map_err(|e| DbError::SerializationError(e.to_string()))?; let uuid_bytes = current.indexed_vector.id.to_bytes_le(); - buffer.write_all(&uuid_bytes).map_err(|e| DbError::SerializationError(e.to_string()))?; + buffer + .write_all(&uuid_bytes) + .map_err(|e| DbError::SerializationError(e.to_string()))?; // serialize left subtree topology serialize_topology_recursive(¤t.left, buffer)?; // serialize right subtree topology serialize_topology_recursive(¤t.right, buffer)?; } else { - buffer.write_all(&[SKIP_MARKER_BYTE]).map_err(|e| DbError::SerializationError(e.to_string()))?; + buffer + .write_all(&[SKIP_MARKER_BYTE]) + .map_err(|e| DbError::SerializationError(e.to_string()))?; } Ok(()) } -fn populate_vectors_recursive(node: &mut Option>, storage: &dyn StorageEngine) -> Result<(), DbError> { +fn populate_vectors_recursive( + node: &mut Option>, + storage: &dyn StorageEngine, +) -> Result<(), DbError> { if let Some(node) = node { - let vector = storage.get_vector(node.indexed_vector.id)?.ok_or(DbError::VectorNotFound(node.indexed_vector.id))?; + let vector = storage + .get_vector(node.indexed_vector.id)? + .ok_or(DbError::VectorNotFound(node.indexed_vector.id))?; node.indexed_vector.vector = vector; populate_vectors_recursive(&mut node.left, storage)?; diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs index caa8ca2..0d44cc1 100644 --- a/crates/index/src/kd_tree/tests.rs +++ b/crates/index/src/kd_tree/tests.rs @@ -704,7 +704,8 @@ fn test_kdtree_vs_flat_euclidean_5d() { } #[test] -fn test_serialize_and_deserialize() { +fn test_serialize_and_deserialize_topo() { + // TODO: currently only tests topology and not vector restore; requires InMemory storage for vector restore testing (RocksDB seems to heavy to be used here for testing) let id1 = Uuid::new_v4(); let id2 = Uuid::new_v4(); let id3 = Uuid::new_v4(); diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index cd95e0a..e494591 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -1,13 +1,10 @@ use defs::{DbError, DenseVector, IndexedVector, Magic, PointId, Similarity}; +use serde::{Deserialize, Serialize}; use storage::StorageEngine; pub mod flat; pub mod kd_tree; -mod deserialize; -pub use crate::deserialize::*; - - pub trait VectorIndex: Send + Sync + SerializableIndex { fn insert(&mut self, vector: IndexedVector) -> Result<(), DbError>; @@ -64,7 +61,7 @@ pub fn distance(a: &DenseVector, b: &DenseVector, dist_type: Similarity) -> f32 } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] pub enum IndexType { Flat, KDTree, diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 0960b42..1933628 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -4,7 +4,7 @@ pub mod metadata; mod util; use crate::{ - constants::{MANIFEST_FILE, SNAPSHOT_PARSER_VER, STORAGE_CHECKPOINT_FILE}, + constants::{MANIFEST_FILE, SNAPSHOT_PARSER_VER}, manifest::Manifest, util::{compress_archive, save_index_metadata, save_topology}, }; @@ -12,59 +12,79 @@ use crate::{ use chrono::{DateTime, Local}; use defs::DbError; use flate2::read::GzDecoder; -use index::{IndexSnapshot, VectorIndex}; +use index::{ + IndexSnapshot, IndexType, VectorIndex, flat::index::FlatIndex, kd_tree::index::KDTree, +}; use semver::Version; use std::{ - fs::File, path::{Path}, time::SystemTime + fs::File, + path::{Path, PathBuf}, + sync::{Arc, RwLock}, + time::SystemTime, +}; +use storage::{ + StorageEngine, StorageType, checkpoint::StorageCheckpoint, rocks_db::RocksDbStorage, }; -use storage::{StorageCheckpoint, StorageEngine, rocks_db::RocksDbStorage}; use tar::Archive; use tempfile::tempdir; use uuid::Uuid; // TODO: implement snapshot engine that runs in its own thread and wakes up in regular intervals +type VectorDbRestore = (Arc, Arc>, usize); + pub struct Snapshot { pub id: Uuid, pub date: SystemTime, pub sem_ver: Version, pub index_snapshot: IndexSnapshot, pub storage_snapshot: StorageCheckpoint, + pub dimensions: usize, } impl Snapshot { - pub fn new(index_snapshot : IndexSnapshot, storage_snapshot : StorageCheckpoint) -> Self { + pub fn new( + index_snapshot: IndexSnapshot, + storage_snapshot: StorageCheckpoint, + dimensions: usize, + ) -> Result { let id = Uuid::new_v4(); let date = SystemTime::now(); - Snapshot { + Ok(Snapshot { id, date, sem_ver: SNAPSHOT_PARSER_VER, index_snapshot, storage_snapshot, - } + dimensions, + }) } - pub fn save( - &self, - path: &Path - ) -> Result<(), DbError> { - - if !path.is_dir() { + pub fn save(&self, dir_path: &Path) -> Result { + if !dir_path.is_dir() { return Err(DbError::SnapshotError(format!( "Invalid path: {}", - path.display() + dir_path.display() ))); } let temp_dir = tempdir().map_err(|e| DbError::SnapshotError(e.to_string()))?; // save index snapshots - let index_metadata_path = - save_index_metadata(temp_dir.path(), self.id, &self.index_snapshot.metadata_b, &self.index_snapshot.magic, dimensions)?; + let index_metadata_path = save_index_metadata( + temp_dir.path(), + self.id, + &self.index_snapshot.metadata_b, + &self.index_snapshot.magic, + )?; - let topology_path = save_topology(temp_dir.path(), self.id, &self.index_snapshot.topology_b, &self.index_snapshot.magic)?; + let topology_path = save_topology( + temp_dir.path(), + self.id, + &self.index_snapshot.topology_b, + &self.index_snapshot.magic, + )?; // take checksums let index_metadata_checksum = util::sha256_digest(&index_metadata_path) @@ -76,6 +96,18 @@ impl Snapshot { let dt_now_local: DateTime = self.date.into(); + // need this for manifest + let storage_checkpoint_filename = self + .storage_snapshot + .path + .file_name() + .ok_or(DbError::SnapshotError( + "Storage checkpoint was not properly made".to_string(), + ))? + .to_str() + .unwrap() + .to_string(); + // create manifest file let manifest = Manifest { id: self.id, @@ -84,13 +116,16 @@ impl Snapshot { index_metadata_checksum, index_topo_checksum, storage_checkpoint_checksum, + storage_type: self.storage_snapshot.storage_type, + index_type: self.index_snapshot.index_type, + dimensions: self.dimensions, + storage_checkpoint_filename, }; let manifest_path = manifest .save(temp_dir.path()) .map_err(|e| DbError::SnapshotError(e.to_string()))?; - let tar_filename = format!( "{}.tar.gz", metadata::Metadata::new( @@ -100,7 +135,7 @@ impl Snapshot { constants::SNAPSHOT_PARSER_VER ) ); - let tar_gz_path = path.join(tar_filename); + let tar_gz_path = dir_path.join(tar_filename); compress_archive( &tar_gz_path, @@ -110,22 +145,15 @@ impl Snapshot { &self.storage_snapshot.path, &manifest_path, ], - temp_dir.path(), ) .map_err(|e| DbError::SnapshotError(e.to_string()))?; - Ok(()) + Ok(tar_gz_path.to_path_buf()) } - pub fn load( - path: &Path, - storage_data_path : &Path - ) -> Result<(Box, Box, usize), DbError> { - - // only rocksdb is supported for snapshots as of now - let mut storage_engine = Box::new(RocksDbStorage::new(storage_data_path)).map_err(|e| DbError::SnapshotError(format!("Failed to reinitialize storage engine: {}",e)))?; - + pub fn load(path: &Path, storage_data_path: &Path) -> Result { let tar_gz = File::open(path) .map_err(|e| DbError::SnapshotError(format!("Couldn't open snapshot: {}", e)))?; + let tar = GzDecoder::new(tar_gz); let mut archive = Archive::new(tar); @@ -170,16 +198,31 @@ impl Snapshot { )); } + // only rocksdb is supported for snapshots as of now + let mut storage_engine: Box = match manifest.storage_type { + StorageType::RocksDb => Box::new(RocksDbStorage::new(storage_data_path)?), + _ => { + return Err(DbError::SnapshotError( + "Unsupported storage type".to_string(), + )); + } + }; + let id = manifest.id; - let index_metadata_path = temp_dir.join(util::metadata_file_name(&id)); - let topology_path = temp_dir.join(util::topology_file_name(&id)); - let storage_checkpoint_path = temp_dir.join(STORAGE_CHECKPOINT_FILE); + let index_metadata_path = temp_dir.join(util::metadata_filename(&id)); + let topology_path = temp_dir.join(util::topology_filename(&id)); + let storage_checkpoint_path = temp_dir.join(manifest.storage_checkpoint_filename); if !index_metadata_path.exists() || !topology_path.exists() || !storage_checkpoint_path.exists() { - return Err(DbError::SnapshotError("Missing snapshot files".to_string())); + return Err(DbError::SnapshotError(format!( + "Missing snapshot files {} , {}, {}", + index_metadata_path.display(), + topology_path.display(), + storage_checkpoint_path.display() + ))); } // match checksums @@ -206,7 +249,7 @@ impl Snapshot { )); } - let (mgmeta, dimensions, meta_bytes) = util::read_index_metadata(&index_metadata_path) + let (mgmeta, meta_bytes) = util::read_index_metadata(&index_metadata_path) .map_err(|_| DbError::SnapshotError("Could not read metadata".to_string()))?; let (mgtopo, topo_bytes) = util::read_index_topology(&topology_path) .map_err(|_| DbError::SnapshotError("Could not read topology".to_string()))?; @@ -217,15 +260,35 @@ impl Snapshot { )); } - storage_engine.restore_checkpoint(&storage_checkpoint_path)?; - let storage_engine_boxed: Box = Box::new(storage_engine); + // validates if manifest storage type matches that in the filename of storage checkpoint + let storage_checkpoint = StorageCheckpoint::open(storage_checkpoint_path.as_path())?; + if storage_checkpoint.storage_type != manifest.storage_type { + return Err(DbError::SnapshotError( + "Storage type mismatch from manifest and checkpoint".to_string(), + )); + } - let vector_index : Box = index::deserialize( - meta_bytes, - topo_bytes, - index::index_type_from_magic(mgmeta)?, - )?; + storage_engine.restore_checkpoint(&storage_checkpoint)?; + + let index_snapshot = IndexSnapshot { + index_type: manifest.index_type, + magic: mgmeta, + metadata_b: meta_bytes, + topology_b: topo_bytes, + }; + + // dynamic dispatch based on index type + let vector_index: Arc> = match manifest.index_type { + IndexType::Flat => Arc::new(RwLock::new(FlatIndex::deserialize(&index_snapshot)?)), + IndexType::KDTree => Arc::new(RwLock::new(KDTree::deserialize(&index_snapshot)?)), + _ => return Err(DbError::SnapshotError("Unsupported index type".to_string())), + }; + + vector_index + .write() + .map_err(|_| DbError::LockError)? + .populate_vectors(&*storage_engine)?; - Ok((vector_index, storage_engine_boxed, dimensions)) + Ok((storage_engine.into(), vector_index, manifest.dimensions)) } } diff --git a/crates/snapshot/src/manifest.rs b/crates/snapshot/src/manifest.rs index 38c5a75..0993435 100644 --- a/crates/snapshot/src/manifest.rs +++ b/crates/snapshot/src/manifest.rs @@ -1,9 +1,11 @@ +use index::IndexType; use serde::{Deserialize, Serialize}; use std::path::Path; use std::{ io::{BufReader, BufWriter, Error, Write}, path::PathBuf, }; +use storage::StorageType; use uuid::Uuid; use crate::constants::MANIFEST_FILE; @@ -18,6 +20,10 @@ pub struct Manifest { pub index_metadata_checksum: String, pub index_topo_checksum: String, pub storage_checkpoint_checksum: String, + pub index_type: IndexType, + pub storage_type: StorageType, + pub dimensions: usize, + pub storage_checkpoint_filename: String, } impl Manifest { diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs index 352654d..d7cfd00 100644 --- a/crates/snapshot/src/metadata.rs +++ b/crates/snapshot/src/metadata.rs @@ -15,6 +15,8 @@ pub struct Metadata { pub sem_ver: Version, } +const FILENAME_METADATA_SEPARATOR: &str = "-x"; + impl Metadata { pub fn new(id: Uuid, date: SystemTime, path: PathBuf, sem_ver: Version) -> Self { Metadata { @@ -38,10 +40,12 @@ impl Metadata { "Invalid UTF-8 in filename".to_string(), ))?; - let parts = filename.split('-').collect::>(); + let parts = filename + .split(FILENAME_METADATA_SEPARATOR) + .collect::>(); if parts.len() != 3 { - return Err(DbError::SnapshotError("Invalid filename".to_string())); + return Err(DbError::SnapshotError("Invalid filename1".to_string())); } let id = parts[0]; @@ -93,9 +97,11 @@ impl Display for Metadata { let dt_now_local: DateTime = self.date.into(); write!( f, - "{}-{}-{}", + "{}{}{}{}{}", self.small_id, + FILENAME_METADATA_SEPARATOR, dt_now_local.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + FILENAME_METADATA_SEPARATOR, self.sem_ver ) } diff --git a/crates/snapshot/src/util.rs b/crates/snapshot/src/util.rs index 439a9bb..7d0510e 100644 --- a/crates/snapshot/src/util.rs +++ b/crates/snapshot/src/util.rs @@ -10,13 +10,15 @@ use std::{io::Write, path::Path}; use tar::Builder; use uuid::Uuid; +type BinFileContent = (Magic, Vec); + #[inline] -pub fn metadata_file_name(id: &Uuid) -> String { +pub fn metadata_filename(id: &Uuid) -> String { format!("{}-index-meta.bin", id) } #[inline] -pub fn topology_file_name(id: &Uuid) -> String { +pub fn topology_filename(id: &Uuid) -> String { format!("{}-index-topo.bin", id) } @@ -45,9 +47,8 @@ pub fn save_index_metadata( uuid: Uuid, bytes: &[u8], magic: &Magic, - dimensions: usize, ) -> Result { - let file_name = metadata_file_name(&uuid); + let file_name = metadata_filename(&uuid); let metadata_file_path = path.join(file_name); let mut file = std::fs::File::create(metadata_file_path.clone()) @@ -55,8 +56,6 @@ pub fn save_index_metadata( file.write_all(magic) .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; - file.write_all(&dimensions.to_le_bytes()) - .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; file.write_all(&bytes.len().to_le_bytes()) .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; file.write_all(bytes) @@ -71,7 +70,7 @@ pub fn save_topology( bytes: &[u8], magic: &Magic, ) -> Result { - let file_name = topology_file_name(&uuid); + let file_name = topology_filename(&uuid); let topology_file_path = path.join(file_name); let mut file = std::fs::File::create(topology_file_path.clone()) @@ -87,7 +86,7 @@ pub fn save_topology( Ok(topology_file_path) } -pub fn compress_archive(path: &Path, files: &[&Path], base_dir: &Path) -> Result<(), Error> { +pub fn compress_archive(path: &Path, files: &[&Path]) -> Result<(), Error> { let tar_gz = File::create(path)?; let enc = GzEncoder::new(tar_gz, Compression::default()); let mut tar = Builder::new(enc); @@ -102,7 +101,7 @@ pub fn compress_archive(path: &Path, files: &[&Path], base_dir: &Path) -> Result Ok(()) } -pub fn read_index_topology(path: &Path) -> Result<(Magic, Vec), DbError> { +pub fn read_index_topology(path: &Path) -> Result { let mut file = File::open(path) .map_err(|e| DbError::SnapshotError(format!("Couldn't open topology file: {}", e)))?; @@ -115,8 +114,8 @@ pub fn read_index_topology(path: &Path) -> Result<(Magic, Vec), DbError> { file.read_exact(&mut len_bytes).map_err(|e| { DbError::SnapshotError(format!("Couldn't read length from topology file: {}", e)) })?; - let len = usize::from_le_bytes(len_bytes); + let mut bytes = vec![0u8; len]; file.read_exact(&mut bytes).map_err(|e| { DbError::SnapshotError(format!("Couldn't read bytes from topology file: {}", e)) @@ -125,7 +124,7 @@ pub fn read_index_topology(path: &Path) -> Result<(Magic, Vec), DbError> { Ok((magic, bytes)) } -pub fn read_index_metadata(path: &Path) -> Result<(Magic, usize, Vec), DbError> { +pub fn read_index_metadata(path: &Path) -> Result { let mut file = File::open(path) .map_err(|e| DbError::SnapshotError(format!("Couldn't open metadata file: {}", e)))?; @@ -134,16 +133,6 @@ pub fn read_index_metadata(path: &Path) -> Result<(Magic, usize, Vec), DbErr DbError::SnapshotError(format!("Couldn't read magic from metadata file: {}", e)) })?; - let mut dimensions_bytes = [0u8; size_of::()]; - file.read_exact(&mut dimensions_bytes).map_err(|e| { - DbError::SnapshotError(format!( - "Couldn't read dimensions from metadata file: {}", - e - )) - })?; - - let dimensions = usize::from_le_bytes(dimensions_bytes); - let mut len_bytes = [0u8; size_of::()]; file.read_exact(&mut len_bytes).map_err(|e| { DbError::SnapshotError(format!("Couldn't read length from metadata file: {}", e)) @@ -155,5 +144,5 @@ pub fn read_index_metadata(path: &Path) -> Result<(Magic, usize, Vec), DbErr DbError::SnapshotError(format!("Couldn't read bytes from metadata file: {}", e)) })?; - Ok((magic, dimensions, bytes)) + Ok((magic, bytes)) } diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index 5cd90e1..4d2afb3 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -11,6 +11,7 @@ bincode.workspace = true defs.workspace = true flate2 = "1.1.5" rocksdb.workspace = true +serde.workspace = true tar = "0.4.44" tempfile.workspace = true uuid.workspace = true diff --git a/crates/storage/src/checkpoint.rs b/crates/storage/src/checkpoint.rs new file mode 100644 index 0000000..e43c5f7 --- /dev/null +++ b/crates/storage/src/checkpoint.rs @@ -0,0 +1,51 @@ +use crate::StorageType; +use crate::in_memory::INMEMORY_CHECKPOINT_FILENAME_MARKER; +use crate::rocks_db::ROCKSDB_CHECKPOINT_FILENAME_MARKER; +use defs::DbError; +use std::path::{Path, PathBuf}; + +impl StorageType { + #[inline] + pub fn checkpoint_filename_marker(&self) -> &str { + match self { + StorageType::InMemory => INMEMORY_CHECKPOINT_FILENAME_MARKER, + StorageType::RocksDb => ROCKSDB_CHECKPOINT_FILENAME_MARKER, + } + } +} + +pub struct StorageCheckpoint { + pub path: PathBuf, + pub storage_type: StorageType, +} + +impl StorageCheckpoint { + pub fn open(path: &Path) -> Result { + let filename = path + .file_name() + .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename2".to_string()))? + .to_str() + .ok_or_else(|| { + DbError::StorageCheckpointError("Invalid UTF-8 in filename".to_string()) + })? + .to_owned(); + let marker = filename + .split_once("-") + .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename3".to_string()))? + .0; + + let storage_type = match marker { + ROCKSDB_CHECKPOINT_FILENAME_MARKER => StorageType::RocksDb, + _ => { + return Err(DbError::StorageCheckpointError( + "Invalid storage type".to_string(), + )); + } + }; + + Ok(StorageCheckpoint { + path: path.to_path_buf(), + storage_type, + }) + } +} diff --git a/crates/storage/src/in_memory.rs b/crates/storage/src/in_memory.rs index 19df632..647627d 100644 --- a/crates/storage/src/in_memory.rs +++ b/crates/storage/src/in_memory.rs @@ -1,9 +1,9 @@ -use crate::{StorageCheckpoint, StorageEngine, VectorPage}; +use crate::StorageType; +use crate::{StorageEngine, VectorPage, checkpoint::StorageCheckpoint}; use defs::{DbError, DenseVector, Payload, PointId}; use std::path::{Path, PathBuf}; -use crate::StorageType; - +pub const INMEMORY_CHECKPOINT_FILENAME_MARKER: &str = "inmemory"; pub struct MemoryStorage { // define here how MemoryStorage will be defined @@ -47,7 +47,10 @@ impl StorageEngine for MemoryStorage { } fn checkpoint_at(&self, _path: &Path) -> Result { - Ok(StorageCheckpoint { path: PathBuf::default(), storage_type: StorageType::InMemory}) + Ok(StorageCheckpoint { + path: PathBuf::default(), + storage_type: StorageType::InMemory, + }) } fn restore_checkpoint(&mut self, _checkpoint: &StorageCheckpoint) -> Result<(), DbError> { diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 134dc35..8228f72 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -1,9 +1,9 @@ +use crate::rocks_db::RocksDbStorage; use defs::{DbError, DenseVector, Payload, PointId}; -use tempfile::TempDir; +use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; use std::sync::Arc; - -use crate::rocks_db::{ROCKSDB_CHECKPOINT_FILENAME_MARKER, RocksDbStorage}; +pub mod checkpoint; pub type VectorPage = (Vec<(PointId, DenseVector)>, PointId); @@ -20,14 +20,17 @@ pub trait StorageEngine: Send + Sync { fn contains_point(&self, id: PointId) -> Result; fn list_vectors(&self, offset: PointId, limit: usize) -> Result, DbError>; - fn checkpoint_at(&self, path: &Path) -> Result; - fn restore_checkpoint(&mut self, checkpoint: &StorageCheckpoint) -> Result<(), DbError>; + fn checkpoint_at(&self, path: &Path) -> Result; + fn restore_checkpoint( + &mut self, + checkpoint: &checkpoint::StorageCheckpoint, + ) -> Result<(), DbError>; } pub mod in_memory; pub mod rocks_db; -#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] pub enum StorageType { InMemory, RocksDb, @@ -45,26 +48,3 @@ pub fn create_storage_engine( }, } } - - -pub struct StorageCheckpoint { - pub path: PathBuf, - pub storage_type: StorageType, -} - -impl StorageCheckpoint { - fn open(path: &Path) -> Result { - let filename = path.file_name().ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))?.to_str().ok_or_else(|| DbError::StorageCheckpointError("Invalid UTF-8 in filename".to_string()))?.to_owned(); - let marker = filename.split_once("-").ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))?.0; - - let storage_type = match marker { - ROCKSDB_CHECKPOINT_FILENAME_MARKER => StorageType::RocksDb, - _ => return Err(DbError::StorageCheckpointError("Invalid storage type".to_string())), - }; - - Ok(StorageCheckpoint { - path: path.to_path_buf(), - storage_type, - }) - } -} diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index bcc795a..4e0489d 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -1,6 +1,7 @@ // Rewrite needed -use crate::{StorageCheckpoint, StorageEngine, VectorPage}; +use crate::StorageType; +use crate::{StorageEngine, VectorPage, checkpoint::StorageCheckpoint}; use bincode::{deserialize, serialize}; use defs::{DbError, DenseVector, Payload, Point, PointId}; use flate2::{Compression, read::GzDecoder, write::GzEncoder}; @@ -11,7 +12,6 @@ use std::{ }; use tar::{Archive, Builder}; use tempfile::tempdir; -use crate::StorageType; //TODO: Implement RocksDbStorage with necessary fields and implementations //TODO: Optimize the basic design @@ -200,7 +200,11 @@ impl StorageEngine for RocksDbStorage { })?; // filename is rocksdb-{uuid}.tar.gz - let checkpoint_filename = format!("{}-{}.tar.gz",ROCKSDB_CHECKPOINT_FILENAME_MARKER,uuid::Uuid::new_v4().to_string()); + let checkpoint_filename = format!( + "{}-{}.tar.gz", + ROCKSDB_CHECKPOINT_FILENAME_MARKER, + uuid::Uuid::new_v4() + ); let checkpoint_path = path.join(checkpoint_filename); let temp_dir_parent = tempdir().unwrap(); @@ -235,18 +239,37 @@ impl StorageEngine for RocksDbStorage { DbError::StorageCheckpointError(format!("Couldn't compress tar archive: {}", e)) })?; - Ok(StorageCheckpoint { path: checkpoint_path, storage_type: crate::StorageType::RocksDb }) + Ok(StorageCheckpoint { + path: checkpoint_path, + storage_type: crate::StorageType::RocksDb, + }) } fn restore_checkpoint(&mut self, checkpoint: &StorageCheckpoint) -> Result<(), DbError> { // enforce storage type if checkpoint.storage_type != StorageType::RocksDb { - return Err(DbError::StorageCheckpointError(format!("Invalid storage type"))); + return Err(DbError::StorageCheckpointError( + "Invalid storage type".to_string(), + )); } // enforce filename marker - should have been enforced during StoraegCheckpoint::open anyway - let checkpoint_filename = checkpoint.path.file_name().ok_or(DbError::StorageCheckpointError("Could not read checkpoint filename".to_string()))?.to_str().ok_or(DbError::StorageCheckpointError("Could not read checkpoint filename".to_string()))?; - if !checkpoint.path.ends_with(".tar.gz") && checkpoint_filename.starts_with(ROCKSDB_CHECKPOINT_FILENAME_MARKER) { - return Err(DbError::StorageCheckpointError(format!("Invalid filename"))); + let checkpoint_filename = checkpoint + .path + .file_name() + .ok_or(DbError::StorageCheckpointError( + "Could not read checkpoint filename".to_string(), + ))? + .to_str() + .ok_or(DbError::StorageCheckpointError( + "Could not read checkpoint filename".to_string(), + ))?; + if !checkpoint_filename.ends_with(".tar.gz") + || !checkpoint_filename.starts_with(ROCKSDB_CHECKPOINT_FILENAME_MARKER) + { + return Err(DbError::StorageCheckpointError(format!( + "Invalid filename4 {}", + checkpoint_filename + ))); } let tar_gz = File::open(&checkpoint.path).map_err(|e| { @@ -411,7 +434,8 @@ mod tests { .is_ok() ); - let checkpoint = db.checkpoint_at(&checkpoint_path) + let checkpoint = db + .checkpoint_at(&checkpoint_path) .expect("Failed to create checkpoint"); assert!( From 789ebf06f6581696c13e848ea630743d830fd974 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sun, 4 Jan 2026 08:22:19 +0530 Subject: [PATCH 14/25] fix clippy warnings --- crates/api/src/lib.rs | 2 +- crates/index/src/flat/tests.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index ccadea8..5f7c050 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -442,7 +442,7 @@ mod tests { let reload_config = DbRestoreConfig { data_path: temp_dir.path().to_path_buf(), - snapshot_path: snapshot_path, + snapshot_path, }; std::mem::drop(old_db); diff --git a/crates/index/src/flat/tests.rs b/crates/index/src/flat/tests.rs index ca5f239..2f08daa 100644 --- a/crates/index/src/flat/tests.rs +++ b/crates/index/src/flat/tests.rs @@ -204,19 +204,19 @@ fn test_serialize_and_deserialize_topo() { let id4 = Uuid::new_v4(); let v1 = IndexedVector { - id: id1.clone(), + id: id1, vector: vec![0.0, 0.0, 0.0, 0.0], }; let v2 = IndexedVector { - id: id2.clone(), + id: id2, vector: vec![1.0, 0.0, 0.0, 0.0], }; let v3 = IndexedVector { - id: id3.clone(), + id: id3, vector: vec![2.0, 0.0, 0.0, 0.0], }; let v4 = IndexedVector { - id: id4.clone(), + id: id4, vector: vec![3.0, 0.0, 0.0, 0.0], }; From a1d6a512ea3b98e282365093fa718f1239cae56f Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sun, 4 Jan 2026 08:29:16 +0530 Subject: [PATCH 15/25] format manifest --- Cargo.toml | 2 +- crates/index/Cargo.toml | 2 +- crates/snapshot/Cargo.toml | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 83e1dae..cd35677 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,6 @@ grpc = { path = "crates/grpc" } http = { path = "crates/http" } index = { path = "crates/index" } server = { path = "crates/server" } +snapshot = { path = "crates/snapshot" } storage = { path = "crates/storage" } tui = { path = "crates/tui" } -snapshot = { path = "crates/snapshot" } diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index 16a38ff..a8c9ca9 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -10,5 +10,5 @@ license.workspace = true bincode.workspace = true defs.workspace = true serde.workspace = true -uuid.workspace = true storage.workspace = true +uuid.workspace = true diff --git a/crates/snapshot/Cargo.toml b/crates/snapshot/Cargo.toml index 97b19e5..63973e7 100644 --- a/crates/snapshot/Cargo.toml +++ b/crates/snapshot/Cargo.toml @@ -5,16 +5,16 @@ edition.workspace = true license.workspace = true [dependencies] -semver = "1.0.27" +chrono.workspace = true +data-encoding = "2.9.0" defs.workspace = true +flate2 = "1.1.5" index.workspace = true +semver = "1.0.27" +serde.workspace = true +serde_json.workspace = true +sha2 = "0.10.9" storage.workspace = true +tar = "0.4.44" tempfile.workspace = true uuid.workspace = true -sha2 = "0.10.9" -flate2 = "1.1.5" -chrono.workspace = true -tar = "0.4.44" -serde.workspace = true -serde_json.workspace = true -data-encoding = "2.9.0" From ae8e4a4b397ffa5b604f4c76da2215515582f192 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sun, 4 Jan 2026 11:24:02 +0530 Subject: [PATCH 16/25] implement basic local snapshot registry for snapshot engine --- crates/defs/src/error.rs | 1 + crates/snapshot/src/constants.rs | 2 +- crates/snapshot/src/engine.rs | 14 +++ crates/snapshot/src/lib.rs | 1 + crates/snapshot/src/metadata.rs | 4 +- crates/snapshot/src/registry/local.rs | 133 ++++++++++++++++++++++++++ crates/snapshot/src/registry/mod.rs | 36 +++++++ 7 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 crates/snapshot/src/engine.rs create mode 100644 crates/snapshot/src/registry/local.rs create mode 100644 crates/snapshot/src/registry/mod.rs diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index ee2379b..a56142e 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -15,6 +15,7 @@ pub enum DbError { StorageCheckpointError(String), InvalidMagicBytes(String), VectorNotFound(uuid::Uuid), + SnapshotRegistryError(String), } #[derive(Debug)] diff --git a/crates/snapshot/src/constants.rs b/crates/snapshot/src/constants.rs index f202470..06c7c9f 100644 --- a/crates/snapshot/src/constants.rs +++ b/crates/snapshot/src/constants.rs @@ -1,7 +1,7 @@ use semver::Version; pub const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); -pub const SMALL_ID_LEN: usize = 5; +pub const SMALL_ID_LEN: usize = 8; pub const MANIFEST_FILE: &str = "manifest.json"; pub const STORAGE_CHECKPOINT_FILE: &str = "storage-checkpoint.tar.gz"; diff --git a/crates/snapshot/src/engine.rs b/crates/snapshot/src/engine.rs new file mode 100644 index 0000000..b6b64b5 --- /dev/null +++ b/crates/snapshot/src/engine.rs @@ -0,0 +1,14 @@ +// what do i need this to do? +// manage a source of snapshots and load and unload metadata +// load latest snapshot, load latest snapshot before x (ok but) +// manage a worker thread that is woken up at regular intervals or at the call of a function to take snapshot +// it accepts an arc +// +// broad architecture - what i want: +// - abstract snapshot source -> can be local directory or remote(define protocol) +// source operations: +// - add snapshot (with Snapshot) +// - read snapshot metadatas with paging +// - read Snapshot of specific snapshot - internal implementation: unpack and read manifest file`(dont bother with checksums verification) +// - make a proxy wrapper that deletes the temp file on destroy - caching is internal implementation +// diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 1933628..cfacfcb 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -1,6 +1,7 @@ pub mod constants; pub mod manifest; pub mod metadata; +pub mod registry; mod util; use crate::{ diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs index d7cfd00..2e15a97 100644 --- a/crates/snapshot/src/metadata.rs +++ b/crates/snapshot/src/metadata.rs @@ -7,9 +7,11 @@ use std::{fmt::Display, path::PathBuf, time::SystemTime}; use std::{fs, path::Path}; use uuid::Uuid; +pub type SmallID = String; + // Metadata is the data that can be parsed from the snapshot filename pub struct Metadata { - pub small_id: String, + pub small_id: SmallID, pub date: SystemTime, pub path: PathBuf, pub sem_ver: Version, diff --git a/crates/snapshot/src/registry/local.rs b/crates/snapshot/src/registry/local.rs new file mode 100644 index 0000000..65aa691 --- /dev/null +++ b/crates/snapshot/src/registry/local.rs @@ -0,0 +1,133 @@ +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use crate::registry::SnapshotMetaPage; +use crate::registry::SnapshotRegistry; +use crate::{ + Snapshot, VectorDbRestore, + metadata::{Metadata, SmallID}, +}; +use defs::DbError; + +pub struct LocalRegistry { + pub dir: PathBuf, + filename_cache: HashMap, +} + +impl LocalRegistry { + pub fn new(dir: &Path) -> Result { + fs::create_dir_all(dir).map_err(|e| DbError::SnapshotRegistryError(e.to_string()))?; + Ok(LocalRegistry { + dir: dir.to_path_buf(), + filename_cache: HashMap::new(), + }) + } +} + +impl SnapshotRegistry for LocalRegistry { + fn add_snapshot(&mut self, snapshot: &Snapshot) -> Result<(), DbError> { + snapshot.save(self.dir.as_path())?; + Ok(()) + } + + fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result { + let mut res = Vec::new(); + let filtered_files = fs::read_dir(self.dir.as_path()) + .map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? + .skip(offset) + .take(limit); + + for file in filtered_files { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + let metadata = Metadata::parse(file_path.as_path())?; + + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + + res.push(metadata); + } + Ok(res) + } + + fn remove_snapshot(&mut self, small_id: SmallID) -> Result { + if let Some(filename) = self.filename_cache.get(&small_id) { + let snapshot_filepath = self.dir.join(filename); + + let metadata = Metadata::parse(snapshot_filepath.as_path())?; + fs::remove_file(snapshot_filepath.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Failed to remove snapshot: {}", e)) + })?; + self.filename_cache.remove_entry(&small_id); + Ok(metadata) + } else { + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + let metadata = Metadata::parse(file_path.as_path())?; + + if metadata.small_id == small_id { + return Ok(metadata); + } + } + Err(DbError::SnapshotRegistryError( + "Snapshot not found".to_string(), + )) + } + } + + fn load( + &mut self, + small_id: String, + storage_data_path: &Path, + ) -> Result { + if let Some(filename) = self.filename_cache.get(&small_id) { + let snapshot_filepath = self.dir.join(filename); + Snapshot::load(snapshot_filepath.as_path(), storage_data_path) + } else { + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + let metadata = Metadata::parse(file_path.as_path())?; + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + if metadata.small_id == small_id { + return Snapshot::load(file_path.as_path(), storage_data_path); + } + } + Err(DbError::SnapshotRegistryError( + "Snapshot not found".to_string(), + )) + } + } +} diff --git a/crates/snapshot/src/registry/mod.rs b/crates/snapshot/src/registry/mod.rs new file mode 100644 index 0000000..f566108 --- /dev/null +++ b/crates/snapshot/src/registry/mod.rs @@ -0,0 +1,36 @@ +// what do i need this to do? +// manage a source of snapshots and load and unload metadata +// load latest snapshot, load latest snapshot before x (ok but) +// manage a worker thread that is woken up at regular intervals or at the call of a function to take snapshot +// it accepts an arc +// +// broad architecture - what i want: +// - abstract snapshot source -> can be local directory or remote(define protocol) +// source operations: +// - add snapshot (with Snapshot) +// - read snapshot metadatas with paging +// - read Snapshot of specific snapshot - internal implementation: unpack and read manifest file`(dont bother with checksums verification) +// - make a proxy wrapper that deletes the temp file on destroy - caching is internal implementation +// + +use std::path::Path; + +use defs::DbError; +pub mod local; +use crate::{Snapshot, VectorDbRestore, metadata::Metadata}; + +pub type SnapshotMetaPage = Vec; + +pub const INFINITY_LIMIT: usize = 100000; +pub const NO_OFFSET: usize = 0; + +pub trait SnapshotRegistry { + fn add_snapshot(&mut self, snapshot: &Snapshot) -> Result<(), DbError>; + fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result; + fn remove_snapshot(&mut self, small_id: String) -> Result; + fn load( + &mut self, + small_id: String, + storage_data_path: &Path, + ) -> Result; +} From 809ec0c1313ec8e69d4602a26fcb1472db396bc0 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sun, 4 Jan 2026 12:45:15 +0530 Subject: [PATCH 17/25] implement lockfile for snapshot registry --- Cargo.lock | 11 +++++++ crates/snapshot/Cargo.toml | 1 + crates/snapshot/src/constants.rs | 2 -- crates/snapshot/src/engine.rs | 14 --------- crates/snapshot/src/registry/constants.rs | 1 + crates/snapshot/src/registry/local.rs | 37 ++++++++++++++++++++++- crates/snapshot/src/registry/mod.rs | 1 + 7 files changed, 50 insertions(+), 17 deletions(-) delete mode 100644 crates/snapshot/src/engine.rs create mode 100644 crates/snapshot/src/registry/constants.rs diff --git a/Cargo.lock b/Cargo.lock index b338615..69efb5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -580,6 +580,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futures-channel" version = "0.3.31" @@ -2042,6 +2052,7 @@ dependencies = [ "data-encoding", "defs", "flate2", + "fs2", "index", "semver", "serde", diff --git a/crates/snapshot/Cargo.toml b/crates/snapshot/Cargo.toml index 63973e7..10328f1 100644 --- a/crates/snapshot/Cargo.toml +++ b/crates/snapshot/Cargo.toml @@ -9,6 +9,7 @@ chrono.workspace = true data-encoding = "2.9.0" defs.workspace = true flate2 = "1.1.5" +fs2 = "0.4.3" index.workspace = true semver = "1.0.27" serde.workspace = true diff --git a/crates/snapshot/src/constants.rs b/crates/snapshot/src/constants.rs index 06c7c9f..3dd46d3 100644 --- a/crates/snapshot/src/constants.rs +++ b/crates/snapshot/src/constants.rs @@ -3,5 +3,3 @@ use semver::Version; pub const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); pub const SMALL_ID_LEN: usize = 8; pub const MANIFEST_FILE: &str = "manifest.json"; - -pub const STORAGE_CHECKPOINT_FILE: &str = "storage-checkpoint.tar.gz"; diff --git a/crates/snapshot/src/engine.rs b/crates/snapshot/src/engine.rs deleted file mode 100644 index b6b64b5..0000000 --- a/crates/snapshot/src/engine.rs +++ /dev/null @@ -1,14 +0,0 @@ -// what do i need this to do? -// manage a source of snapshots and load and unload metadata -// load latest snapshot, load latest snapshot before x (ok but) -// manage a worker thread that is woken up at regular intervals or at the call of a function to take snapshot -// it accepts an arc -// -// broad architecture - what i want: -// - abstract snapshot source -> can be local directory or remote(define protocol) -// source operations: -// - add snapshot (with Snapshot) -// - read snapshot metadatas with paging -// - read Snapshot of specific snapshot - internal implementation: unpack and read manifest file`(dont bother with checksums verification) -// - make a proxy wrapper that deletes the temp file on destroy - caching is internal implementation -// diff --git a/crates/snapshot/src/registry/constants.rs b/crates/snapshot/src/registry/constants.rs new file mode 100644 index 0000000..9138454 --- /dev/null +++ b/crates/snapshot/src/registry/constants.rs @@ -0,0 +1 @@ +pub const LOCAL_REGISTRY_LOCKFILE: &str = "LOCKFILE"; diff --git a/crates/snapshot/src/registry/local.rs b/crates/snapshot/src/registry/local.rs index 65aa691..13c66bd 100644 --- a/crates/snapshot/src/registry/local.rs +++ b/crates/snapshot/src/registry/local.rs @@ -4,13 +4,14 @@ use std::{ path::{Path, PathBuf}, }; -use crate::registry::SnapshotMetaPage; use crate::registry::SnapshotRegistry; +use crate::registry::{SnapshotMetaPage, constants::LOCAL_REGISTRY_LOCKFILE}; use crate::{ Snapshot, VectorDbRestore, metadata::{Metadata, SmallID}, }; use defs::DbError; +use fs2::FileExt; pub struct LocalRegistry { pub dir: PathBuf, @@ -20,6 +21,26 @@ pub struct LocalRegistry { impl LocalRegistry { pub fn new(dir: &Path) -> Result { fs::create_dir_all(dir).map_err(|e| DbError::SnapshotRegistryError(e.to_string()))?; + let lock_file_path = dir.join(LOCAL_REGISTRY_LOCKFILE); + let lock_file = if !lock_file_path.exists() { + fs::File::create(&lock_file_path).map_err(|e| { + DbError::SnapshotRegistryError(format!("Couldn't create LOCKFILE : {}", e)) + })? + } else { + fs::OpenOptions::new() + .read(true) + .write(true) + .open(&lock_file_path) + .map_err(|e| { + DbError::SnapshotRegistryError(format!("Couldn't open LOCKFILE : {}", e)) + })? + }; + + // try to acquire lockfile + lock_file + .try_lock_exclusive() + .map_err(|_| DbError::SnapshotRegistryError("Couldn't acquire LOCKFILE".to_string()))?; + Ok(LocalRegistry { dir: dir.to_path_buf(), filename_cache: HashMap::new(), @@ -131,3 +152,17 @@ impl SnapshotRegistry for LocalRegistry { } } } + +impl Drop for LocalRegistry { + fn drop(&mut self) { + // remove exclusive lock on lockfile + let lock_file_path = self.dir.join(LOCAL_REGISTRY_LOCKFILE); + if let Ok(lock_file) = fs::OpenOptions::new() + .read(true) + .write(true) + .open(&lock_file_path) + { + let _ = lock_file.unlock(); + } + } +} diff --git a/crates/snapshot/src/registry/mod.rs b/crates/snapshot/src/registry/mod.rs index f566108..9c61df0 100644 --- a/crates/snapshot/src/registry/mod.rs +++ b/crates/snapshot/src/registry/mod.rs @@ -16,6 +16,7 @@ use std::path::Path; use defs::DbError; +pub mod constants; pub mod local; use crate::{Snapshot, VectorDbRestore, metadata::Metadata}; From 2dd4e0ea14d316e7b0e2abc21f4f4ebc26a3e0a0 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Sun, 4 Jan 2026 12:55:06 +0530 Subject: [PATCH 18/25] change order of metadata entries in filename --- crates/snapshot/src/engine/mod.rs | 0 crates/snapshot/src/metadata.rs | 10 +++++----- crates/storage/src/checkpoint.rs | 4 ++-- crates/storage/src/rocks_db.rs | 7 +++---- 4 files changed, 10 insertions(+), 11 deletions(-) create mode 100644 crates/snapshot/src/engine/mod.rs diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs index 2e15a97..16f38b5 100644 --- a/crates/snapshot/src/metadata.rs +++ b/crates/snapshot/src/metadata.rs @@ -47,15 +47,15 @@ impl Metadata { .collect::>(); if parts.len() != 3 { - return Err(DbError::SnapshotError("Invalid filename1".to_string())); + return Err(DbError::SnapshotError("Invalid filename".to_string())); } - let id = parts[0]; + let id = parts[1]; if id.len() != SMALL_ID_LEN { return Err(DbError::SnapshotError("Invalid UUID".to_string())); } - let date = chrono::DateTime::parse_from_rfc3339(parts[1]) + let date = chrono::DateTime::parse_from_rfc3339(parts[0]) .map_err(|_| DbError::SnapshotError("Invalid date".to_string()))?; let version = Version::parse(parts[2]) .map_err(|_| DbError::SnapshotError("Invalid version".to_string()))?; @@ -100,10 +100,10 @@ impl Display for Metadata { write!( f, "{}{}{}{}{}", - self.small_id, - FILENAME_METADATA_SEPARATOR, dt_now_local.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), FILENAME_METADATA_SEPARATOR, + self.small_id, + FILENAME_METADATA_SEPARATOR, self.sem_ver ) } diff --git a/crates/storage/src/checkpoint.rs b/crates/storage/src/checkpoint.rs index e43c5f7..09827fd 100644 --- a/crates/storage/src/checkpoint.rs +++ b/crates/storage/src/checkpoint.rs @@ -23,7 +23,7 @@ impl StorageCheckpoint { pub fn open(path: &Path) -> Result { let filename = path .file_name() - .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename2".to_string()))? + .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))? .to_str() .ok_or_else(|| { DbError::StorageCheckpointError("Invalid UTF-8 in filename".to_string()) @@ -31,7 +31,7 @@ impl StorageCheckpoint { .to_owned(); let marker = filename .split_once("-") - .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename3".to_string()))? + .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))? .0; let storage_type = match marker { diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index 4e0489d..571ffaf 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -266,10 +266,9 @@ impl StorageEngine for RocksDbStorage { if !checkpoint_filename.ends_with(".tar.gz") || !checkpoint_filename.starts_with(ROCKSDB_CHECKPOINT_FILENAME_MARKER) { - return Err(DbError::StorageCheckpointError(format!( - "Invalid filename4 {}", - checkpoint_filename - ))); + return Err(DbError::StorageCheckpointError( + "Invalid filename".to_string(), + )); } let tar_gz = File::open(&checkpoint.path).map_err(|e| { From 8380b2bedd487132ab6e803b7618d8575ff68fca Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 03:18:53 +0530 Subject: [PATCH 19/25] implement snapshot engine worker thread; add dead/alive snapshots --- crates/api/src/lib.rs | 7 +- crates/defs/src/error.rs | 1 + crates/defs/src/lib.rs | 7 ++ crates/snapshot/src/engine/mod.rs | 135 ++++++++++++++++++++++++++ crates/snapshot/src/lib.rs | 1 + crates/snapshot/src/registry/local.rs | 14 ++- crates/snapshot/src/registry/mod.rs | 9 +- 7 files changed, 168 insertions(+), 6 deletions(-) diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 5f7c050..6b62a12 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -1,4 +1,4 @@ -use defs::{DbError, IndexedVector, Similarity}; +use defs::{DbError, IndexedVector, Similarity, SnapshottableDb}; use defs::{DenseVector, Payload, Point, PointId}; use index::kd_tree::index::KDTree; @@ -132,9 +132,10 @@ impl VectorDb { Ok(inserted) } +} - // create snapshot at specificied directory - pub fn create_snapshot(&self, dir_path: &Path) -> Result { +impl SnapshottableDb for VectorDb { + fn create_snapshot(&self, dir_path: &Path) -> Result { if !dir_path.is_dir() { return Err(DbError::SnapshotError(format!( "Invalid path: {}", diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index a56142e..5f35c1e 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -16,6 +16,7 @@ pub enum DbError { InvalidMagicBytes(String), VectorNotFound(uuid::Uuid), SnapshotRegistryError(String), + StorageEngineError(String), } #[derive(Debug)] diff --git a/crates/defs/src/lib.rs b/crates/defs/src/lib.rs index c2a79bf..074c352 100644 --- a/crates/defs/src/lib.rs +++ b/crates/defs/src/lib.rs @@ -1,6 +1,13 @@ pub mod error; pub mod types; +use std::path::{Path, PathBuf}; + // Without re-exports, users would need to write defs::types::SomeType instead of just defs::SomeType. Re-exports simplify the API by flattening the module hierarchy. The * means "everything public" from that module. pub use error::*; pub use types::*; + +// hoisted trait so it can be used by the snapshots crate +pub trait SnapshottableDb: Send + Sync { + fn create_snapshot(&self, dir_path: &Path) -> Result; +} diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs index e69de29..eb38bac 100644 --- a/crates/snapshot/src/engine/mod.rs +++ b/crates/snapshot/src/engine/mod.rs @@ -0,0 +1,135 @@ +use std::{ + collections::VecDeque, + sync::{Arc, Condvar, Mutex}, + time::Duration, +}; + +use defs::{DbError, SnapshottableDb}; + +use crate::{metadata::Metadata, registry::SnapshotRegistry}; + +pub struct SnapshotEngine { + interval: Duration, + last_k: usize, + snapshot_queue: Arc>>, + db: Arc, + registry: Arc>, + worker_cv: Arc, + worker_running: Arc>, +} +impl SnapshotEngine { + pub fn new( + interval: usize, + last_k: usize, + db: Arc, + registry: Arc>, + ) -> Self { + Self { + interval: Duration::from_secs(interval as u64), + last_k, + snapshot_queue: Arc::new(Mutex::new(VecDeque::new())), + db, + registry, + worker_cv: Arc::new(Condvar::new()), + worker_running: Arc::new(Mutex::new(false)), + } + } + + pub fn stop_worker(&mut self) -> Result<(), DbError> { + // acquire lock for worker_running + let mut worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; + if !*worker_running { + return Err(DbError::StorageEngineError( + "Worker thread not running".to_string(), + )); + } + *worker_running = false; + self.worker_cv.notify_one(); + Ok(()) + } + + pub fn snapshot(&mut self) -> Result<(), DbError> { + // acquire lock for worker_running + let worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; + if !*worker_running { + return Err(DbError::StorageEngineError( + "Worker thread not running".to_string(), + )); + } + self.worker_cv.notify_one(); + Ok(()) + } + + pub fn start_worker(&mut self) -> Result<(), DbError> { + // acquire lock for worker_running + let mut worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; + if *worker_running { + return Err(DbError::StorageEngineError( + "Worker thread already running".to_string(), + )); + } + *worker_running = true; + + let worker_running_clone = Arc::clone(&self.worker_running); + let db_clone = Arc::clone(&self.db); + let registry_clone = Arc::clone(&self.registry); + let worker_cv_clone = Arc::clone(&self.worker_cv); + let snapshot_queue_clone = Arc::clone(&self.snapshot_queue); + let interval_clone = self.interval; + let last_k_clone = self.last_k; + + let _ = std::thread::spawn(move || { + Self::worker( + interval_clone, + last_k_clone, + worker_running_clone, + db_clone, + registry_clone, + worker_cv_clone, + snapshot_queue_clone, + ); + }); + Ok(()) + } + + // TODO: ask someone about sync issues (i dont think there are any) + fn worker( + interval: Duration, + last_k: usize, + worker_running: Arc>, + db: Arc, + registry: Arc>, + worker_cv: Arc, + snapshot_queue: Arc>>, + ) { + loop { + // acquire the lock and exit if its false + let worker_running = worker_running + .lock() + .map_err(|_| DbError::LockError) + .unwrap(); + if !*worker_running { + break; + } + + let snapshot_path = db + .create_snapshot(registry.lock().unwrap().dir().as_path()) + .unwrap(); + let snapshot_metadata = Metadata::parse(&snapshot_path).unwrap(); + + { + let mut queue = snapshot_queue.lock().unwrap(); + queue.push_back(snapshot_metadata); + + while queue.len() > last_k { + let old = queue.pop_front().unwrap(); + registry.lock().unwrap().mark_dead(old.small_id).unwrap(); + } + + // drop queue lock + } + + let _ = worker_cv.wait_timeout(worker_running, interval).unwrap(); + } + } +} diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index cfacfcb..1d77940 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -1,4 +1,5 @@ pub mod constants; +pub mod engine; pub mod manifest; pub mod metadata; pub mod registry; diff --git a/crates/snapshot/src/registry/local.rs b/crates/snapshot/src/registry/local.rs index 13c66bd..9acd64a 100644 --- a/crates/snapshot/src/registry/local.rs +++ b/crates/snapshot/src/registry/local.rs @@ -4,7 +4,7 @@ use std::{ path::{Path, PathBuf}, }; -use crate::registry::SnapshotRegistry; +use crate::registry::{INFINITY_LIMIT, NO_OFFSET, SnapshotRegistry}; use crate::registry::{SnapshotMetaPage, constants::LOCAL_REGISTRY_LOCKFILE}; use crate::{ Snapshot, VectorDbRestore, @@ -85,6 +85,10 @@ impl SnapshotRegistry for LocalRegistry { Ok(res) } + fn list_alive_snapshots(&mut self) -> Result { + self.list_snapshots(INFINITY_LIMIT, NO_OFFSET) + } + fn remove_snapshot(&mut self, small_id: SmallID) -> Result { if let Some(filename) = self.filename_cache.get(&small_id) { let snapshot_filepath = self.dir.join(filename); @@ -116,6 +120,10 @@ impl SnapshotRegistry for LocalRegistry { } } + fn mark_dead(&mut self, small_id: String) -> Result { + self.remove_snapshot(small_id) + } + fn load( &mut self, small_id: String, @@ -151,6 +159,10 @@ impl SnapshotRegistry for LocalRegistry { )) } } + + fn dir(&self) -> PathBuf { + self.dir.clone() + } } impl Drop for LocalRegistry { diff --git a/crates/snapshot/src/registry/mod.rs b/crates/snapshot/src/registry/mod.rs index 9c61df0..039e5d0 100644 --- a/crates/snapshot/src/registry/mod.rs +++ b/crates/snapshot/src/registry/mod.rs @@ -13,7 +13,7 @@ // - make a proxy wrapper that deletes the temp file on destroy - caching is internal implementation // -use std::path::Path; +use std::path::{Path, PathBuf}; use defs::DbError; pub mod constants; @@ -25,7 +25,7 @@ pub type SnapshotMetaPage = Vec; pub const INFINITY_LIMIT: usize = 100000; pub const NO_OFFSET: usize = 0; -pub trait SnapshotRegistry { +pub trait SnapshotRegistry: Send + Sync { fn add_snapshot(&mut self, snapshot: &Snapshot) -> Result<(), DbError>; fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result; fn remove_snapshot(&mut self, small_id: String) -> Result; @@ -34,4 +34,9 @@ pub trait SnapshotRegistry { small_id: String, storage_data_path: &Path, ) -> Result; + fn dir(&self) -> PathBuf; + + // in the future this could be used to maybe move an old/stale snapshot to cold storage or to a remote registry + fn mark_dead(&mut self, small_id: String) -> Result; // current behaviour is to call remove_snapshot; + fn list_alive_snapshots(&mut self) -> Result; // current behaviour is to call list_snapshots; } From dc7f0fd927d53d3d97b927ff802aabf8cadaddb4 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 04:57:31 +0530 Subject: [PATCH 20/25] refactor registry api; bug fixes and dummy test for snapshot engine --- crates/api/src/lib.rs | 50 +++++++ ...-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz | Bin 0 -> 6248 bytes ...-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz | Bin 0 -> 6443 bytes ...-05T04:54:17+05:30-x4703aff0-x0.1.0.tar.gz | Bin 0 -> 6777 bytes ...-05T04:54:22+05:30-xf1ff6ec9-x0.1.0.tar.gz | Bin 0 -> 7102 bytes ...-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz | Bin 0 -> 7217 bytes crates/api/src/temp/LOCKFILE | 0 crates/snapshot/src/engine/mod.rs | 15 ++- crates/snapshot/src/metadata.rs | 5 +- crates/snapshot/src/registry/local.rs | 125 +++++++++++++++--- crates/snapshot/src/registry/mod.rs | 9 +- 11 files changed, 181 insertions(+), 23 deletions(-) create mode 100644 crates/api/src/temp/2026-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz create mode 100644 crates/api/src/temp/2026-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz create mode 100644 crates/api/src/temp/2026-01-05T04:54:17+05:30-x4703aff0-x0.1.0.tar.gz create mode 100644 crates/api/src/temp/2026-01-05T04:54:22+05:30-xf1ff6ec9-x0.1.0.tar.gz create mode 100644 crates/api/src/temp/2026-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz create mode 100644 crates/api/src/temp/LOCKFILE diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 6b62a12..390fb12 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -217,8 +217,14 @@ mod tests { // TODO: Add more exhaustive tests + use std::{sync::Mutex, thread::sleep, time::Duration}; + use super::*; use defs::ContentType; + use snapshot::{ + engine::SnapshotEngine, + registry::{SnapshotRegistry, local::LocalRegistry}, + }; use tempfile::{TempDir, tempdir}; // Helper function to create a test database @@ -454,4 +460,48 @@ mod tests { // check if vectors was restored assert!(loaded_db.get(point_id).unwrap().unwrap().vector.unwrap() == vec1); } + + #[test] + fn test_snapshot_engine() { + //TODO: write proper unit test + let (_db, _temp_dir) = create_test_db(); + + let db = Arc::new(Mutex::new(_db)); + let registry = Arc::new(Mutex::new( + LocalRegistry::new(Path::new( + "/home/tanmay/Documents/CodingRepos/vector-db/crates/api/src/temp", + )) + .unwrap(), + )); + + let interval = 5; + let last_k = 5; + let mut se = SnapshotEngine::new(interval, last_k, db.clone(), registry.clone()); + sleep(Duration::from_secs(1)); + + se.start_worker().unwrap(); + let vec1 = vec![0.0, 1.0, 2.0]; + + for _ in 0..30 { + sleep(Duration::from_secs(2)); + + println!( + "{}", + registry.lock().unwrap().get_latest_snapshot().unwrap() + ); + + let _ = db + .lock() + .unwrap() + .insert( + vec1.clone(), + Payload { + content_type: ContentType::Text, + content: format!("Test content {}", 0), + }, + ) + .unwrap(); + } + se.stop_worker().unwrap(); + } } diff --git a/crates/api/src/temp/2026-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz b/crates/api/src/temp/2026-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..d51bb76d94fa559ee1cca3fde94934d802061562 GIT binary patch literal 6248 zcmV-u7?7tOkl`CKr(^?l3@~ghBQNz zEFc*P10p%+oO2XVkes6;l0hY@AS~{Cwfky!Yxlj~+E*KXpFjF^oxXi;Jl$RQ+c2~k z&;kvG2_P*HPyvXT2t)vm0>cH+CO1_DJy zAVfeQP#6XU{-ymB5D1YlR2U=zg@HjJ7}4*`ATWpsNc49A?~H>xad8p-SNU^S{>lC? zg6+?i^w0JR(I5PO0ruDDKhEC4{=X&vAu!m#&i@PFANl|H0ZjZSOKeL~hJESn|EMWi0FI!??){MJ=0pW*93S?GAFoHbsTSq{ZHE>$J6}LklJN3SinCwKFH)7jB-aqx8 z5&wn|lvp$l;n2#*q7WK3&l78LqD!oMMWl%r4pNIq3+i=sA^iMZ(T^f=v*DT%g#=vQ zaDZOxjSH7rw}<5Bkd;D$k2tVX^Yr+_m6fO_zvj@S8d>}r^{12b z9YfkUo(Y4}q@C1PJFij};4^{p5RrE@#JNd@o12?hUJS}?yHi^9#J?P25M?B_ihGJL zEOfPyW#rMm{60ojnJST#&pg~eDVT}^5OPEEin{OhkeKU3SH!F?!L_-`s7Ah@AM5Ep zJwDid9|@;+4DH1&H6EWW{aQNrF}kX#Ey&`}^=YwMDNaFK_bR=z%4$sF+5N}gY(ipH zV~PoH6%7Qt>UjvE_xPOnmaQo>cGvdj&ksZBV^6)BB$dB48DQ{HUJqopCVLqn>N+G7qF6+}}tZb3&Yj^MwmL-4x z86#Qb+U|h^f!&}cTSrBExvaV^ejgzUo8W>x!QEVtC@a@Y{okagM)<_9HK%edBfQ^70dzzP#$#14dKjX9D?Yc}7j15i3%6{{Rpg#uSH0m< zz|_$hkcn{Ux?g45MdF#U&iJ0+Z16;L!SV2+ABeb*ty>1|HmH?7k7CKTCW#{{w0+J8 zllGPEY9Lw12P|(bw=;KT$>M`{qk3EHAh7nxuqEkH;p!5hmTzYBDj^v)&l*{m&raL- z1N!+7Q)SL%KS!?SwRNb?3#aj)9&g49olgd^O1etb9dt9$wT~KcoU7&FP7|`e3kmj| zGksOJyO;y*l6A!l{+P=2^`KiiFG^`$6pP#2D%I7)%sMQ3{oGc1((3aLQ^IbvZY)j( zl{M6UIaKUEue_&cH&;%cgGy-cQe)0Q=wjqRI*(tf-bNVE$V_S+Eq{q>YyJ_xlXs(W zi*wh0SK{;l(8vBzJKo$=jy#a*YStZaCG!M-SC*cx(b`j&STB9frMxuvif8MkOJ|z{ z9Z}~FBi6n95|!7mzaDTo1XIT7mZ+IcH^sS}7Uyun4s$t|Zt5$~I=-9Z+tIJ>t6s_F z&f~OYdF65`dn}UStBHFrIHh@|Ye=H(Xi(zJe5MWeE%KdD){(+z!u-b4O}CRZPPU@# zO(XVm=JFcN(*#xiv-_iuPi3w!jGyb8#JpwJ25K0vT&XEV;WwH6zO>&+b)_gamzH&);hOhPZ{T$Kl6GUdOP=#_u34a_vXs$p=e);htn zzxCv;l92beK#eDLUdzwEmbsj*Pbk)KZa;27C6H@Fiu@laJQ=>Vc7>;+IIPwtkx+UZ zGhu4o_ExIa`QUih2*3Jvqim^(s?w%Ghw#qAU>MI?$!T zA*rpRV{ebCq#-DM^Z3u$PN`lsf{d%J?kIYO>^&nXgYEs{Sz$#vn4gW}@KpMS|~)-2_&_p|*r@1xC)9-g?&dr#vLiqY>Vhy!DEGBTfD>L9@svfK*g$ayR@LvXZR zEmjgKY5DNR$M{_PB*H676I%DtUH&3TqiRQyPxu&i zgzQcyPFgE={Io~H1wL2QD#z%?qZIG3hQ-?yAV~R(61MZC>-Jr- zExVmxJ|D`hB5$^?tg+rQ?0O!VZu+o=JgY=CNUP+nZbzYEsIGRXLTyot&fdwatBmt< zAZD-r*S5XXuYosfre^A+kzVlr@xaV{{~NFM6oFL9iTZ#7mXb=9aQBBNd0UIe_3L&% zpI?6YGOcv}AtabZj<8NJy~~Q}V8oER*{S-Q>ZZ2>cV ziB$<);pgqEDYbYIUN z+q?c_qlC%DxZHV;Fz9(i1F;oww149qe%hw5)3bOYL)|K{dd|1ppL$XnkzRUYcD_I5 z9l7#jEONWv%v8j~J!plp{?c()2-xiAr|+xtl^vIk+XI<~yh+{!f3OtONtoC}Al-DQm*bL6WRnW|k{g9-a#7R;=y%RV7ERS|qhEDRk;-}AdxURpu zG||A!bwaMIA8@6aJV#&N?%T4Ki02@jW-lQ4ZZq2JgNktTdGpta*lVy}r>s{Q&ZeH{ zS*{~x_rdZ3^WWsKJzNZuoSrzlk8YvMAuIi~9o>k)2-)MO`eGDblgq5FA(AH4qP^e- z@w!b1EJCj5+c&C3|H@Ukh`VUQp!bWf2ObncW2}n`m-aG)TdQAv#YxtIVo$axZkFktx?3)$zt`D&w2dXjeoVD|bs;!uoIhURJu5Po>i*ak~M?&#B+M3&DkHf0Z znAFscd;qE7Z=QL{Tny)%KNhCnPgma>nssoW)$C7sDZ$;=v%k+mjb|%gcUa8T$a*#m z_^i-6!8L3?Am0@D{m1aT;3qtfEDkMb`8w^UI~-SDE;r}fGU87l$r*}584?>rJ@bCQ ziaGF#f@x8CN_2&WIb6&9En{R=oXSoPfX`(s?CnW(|P;9JdjxzBUr#@JK`*A zD$s@GPj)Vgzrt3U4I#u|na46M$|b0!aT7#VByNqA_Zr0NC{LnPBJ>`_hE&`)^h3(c ztzW+!-44Y14r;!+rWykk-^jSFT@d%Sel>h?IMS>AV3&Ggm=-RKjRwYuY`mb@{<2OP z%H--IY1Jp5yGITcRlWZ0?$#lYNDpfCwc0lk3O%HUFtIZ@E42@OuGab{C+b7{(LW}M zOs@gIOu9T)-XGc0p3d?U6Q|Vm+Rq}BLP}FKAFFwVF3HUsyDc(I=_@TPr8U%`K+SI2lEw>aFrv7l zz3w+{+#7_6(e1OV8WU0qFE|p551Wg8{^0PQ0d;MFl+|E`4g0U>URoLfun*y^1#>5} z*>pC{LLIv$Q{%Y$ZEF9l{L5oMYPT4Z^Rx1!P4Z776Si5@B?Raa-CzC4=YV;t1yALSMhPlcJJ9Gp$x@P+ zrG{~s-!H#(mbjh-NVj7%`ss$f=Nri)jPx6OX;41=UUacapFW)|0&wG?VGmsrv2vKG&%MUvJOUKNnkxAfSCB}R9A=>(O@ zp#x@}v+qrsT#d`63^fxn3juA33UX*f+EA&%<4mPbE>Gvf_#-_O>b0nIaZ8y~j`fs> z(;9`;;hxryn5;2xjb2cb648l23~0Ywj2eO5DtYhc$Bxf-9v02!juiK;Q^}{4Hl|Yai#DjB&=eW;tyB&tuC`r_gWcZGwwx7Sx=}Tn;jo z&Nsnld(M5gN$5;TcD)_m%3AuJol`zhvh%j%&!mQP6JV+p2v5A=`NRtv3YnCtXTC&T zHKLhB?i!}^#k?9H!zkt!{&2mTJ<(fP$IK{N%J(cQY`eDd2QM!(vw^>QMGUXZL@6mn zOR5uqAD*51T>Nfx@ylrR zLz82=Pe`1udKgQRA)wQo2AiF+s~pqTN3)?yD0aoX;fa*yNC8XuPnU?qEdZ~nhZF`y zMdtCkD2rl?GDv^jff5^7A4sR8|IB3-sK8#4*#{g&H{ z2o_-WyN!s~tD)nxd)-2-!sIh`rPeFjY}{e5v`k{AerKXIl-t?$0W5^@oXiq;69B9^ z*KA-vcq8-q@d=m4$-6k~Y$)9?0dP4MFVS|qb@VPiZTT2U5_H<`6wR#XH@^`l>!y7p+d^ce6xi^IFl7T9{`txeUa0qav0JC`HABsYROzD4AYOi(TO|KQ zc@#;0sgrSE<7*<4R$xWWHMJd*dfQI2FIlnbqv^jWuQ)S(dl~1P7hBP^GOZc+phpe; zvOkRifO%8VS&#&H8eeEgD9TaUeB$p=O3XKx*uFrT<>}xcBy#JheK*#MXhZ5#f80`B zJ`HF8%ds+Z-)JwZi7GHXK(L0$AwL=8(iLEUnpfJZRB<^H`*$XpLLA<9Fbh`FBj ziK@+`Q&f zE4f~*m)!T3vP?4K_NcZK0i~_(%`iRf!c`vV6=m|waCY+gfqMC?Xk{nw4CyGJXn)yv zaT~-*`b_wk2KZ;z++-a+SB0dP$W*ndB$L%8$rE3{o%(#wkIj{3d|_0x0_tKCAN<&_ z{<>=hBW-F>Ize)Lq0IHDeIyl2asYlho(V!E;c_j zF&@DyyCiFqL`qb`4loZ2ei?eLVQpe3(;Z*3cnml z(;m%z*!@%aV$+3uprC1E9H?57u1#$k*GQxi9!qq`+jcC$q_#%cakyI)C;Z0Tj+8&MM53MAO427H_&_OM&`+99skh54fj=# z5ovzq+X9yx{ac7B1Fkt1Cd@{ds|H_vv0=iIRH^dNIFi%lk*xZQ8~}hOcomcX^bX-F z$!zV`8&(ZFce-Bnt(#k+2Q;unUMIQ3yE+m>N~AB?*bk}3QX7nZ?!USWJ3A>zE`Gp4 zwy#aNSt-Ca-ECF7e4n>O>{499@D!&^g6PV$0RH;G2>pzbt!=1&j@1Xko!fN>MXiFD zlPeNzbUJS{xoiS#*1!9?^9F&J?CTmo0-F3(1-(_RKVI^Rn;b8-X-df_C4asGs+u9O zrZvT>yG#SmP0GZE(j1{BlNDQ}m)J3fxNT0l$>mkkP@ZkeO!a)|I7Ge-2Obje6jD(B8K~OXT2u2{FKqO4~Vm(|4K%ycjVWb6G z3?d3fiiutv!T%Kew-WuAfk9w2NE8NzL!d~I2pA0miy|+Aiz1O?Xt*d01%X09!Vm}u zA|eb0T`V`~MgQP1APn)lZy+%!^dhh`&fW=biT+bm2YYKQ?k_8_u(%MUQD6iTA%a9- zEKV^b5D7;@AQmVv6bcs>1p{FyiwmI)0)$_zbc7fT2!bGh77#EHg$9AZ(0^{ZyTio* z0@M_AoY3gMsP@nP$G_D$1;p=>w#J~b&erx=XMi~P-)x?RwH+D@$NawSKOy1)yomnb S5B}i)IsX6w1H7;RC;$Ls6GUGC literal 0 HcmV?d00001 diff --git a/crates/api/src/temp/2026-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz b/crates/api/src/temp/2026-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..922140b5a0429aede4104c5cbe3ab58e829f3512 GIT binary patch literal 6443 zcmV+`8Pw(gn+C}s836%lazl4RlO-xa zkfefui#}|>~(6@+H3E#POUn1 z_V=J6D2yeMOzxgP|C7KG{>2j7k(D_{ zJr8(To870hSmtX-=zl%yE&vaDkgP~zQv{D!Ak=4oHS(jt>fsq7-JTxtr-USD(){7; zNQHoOL1Up6Q8B{4Cx9v(a6+TSZG$dm{7VC2-mSV15wci?n3I0PwkDZ6!pjA{VrzAj zaw_Y+!!cp0t$$luJl4{=O*siV1!azz(hRqbt|DM0JHF=SH=N!ea{m0Flpx;gW30 zA_is(MMJ8!FPjDRu3}m*DLf98*fhd3ZaTiu-x$4e9l0Nv_Y%-(-ne)me z^YYv3*2cQ$y)qq9|TUtRe!Vm-s41r)QVF(0RCdti1^SQU1^U4+29$;QzJ%VPY`IpZ)(& zfIE!hFBg{^M$~U_{_-_Xd*4{))3W!Z_K8Lc)xzAfdmfL^7_Z08*0oQ(@r98jI3Dek zR3mVY*Gqk*O$36gwewL=N#&D(x2_qb;)Y}8DPrH!;fPfIt5wy$eW~Uf8K&15p&_Bc zja(l2HWtOX)eb!7$vUvK$F+`cowah$i$^g1w4@KhOCCDk@9xd+Y13%}JX!JExvdi2 zwaqBn6Rpdr_LOTt74QbK)iFXPKP}3fc)7u3%71dyy!*y!SW{JzZwjlVU|^^d^{m!@ z7krPc*1k7Dzw&JwQq5ZQwSlk4F1bu=W`j=ZVth_fOR3l}8dGah*X63;n0Zq4y{`iA_BpWNkxlMJGPg-IsM)J% zj@6-gZqzz7;-k<2UA-4E;mCo36#iPNJ=ER&npA7VVWvcCE0$kzM9Y87Yo}+h&61n+ z#li<;sdLyV81_{C!Cvw$ADUdkckq%MT5NiTW~l;irv>@sqT*i5(|1VcOFWw6r=s;O z!y>TxH=MQnXG|D^;zP_kg}NPDl|@>bzkbNaO!YVo;7C<&uk{K{5}dF+&n|biuZi5J z&({u;M6d$;D6RFVC$hHr3|M)SSTw(4-I29ORpHkX^6^O4k3%0X#6&4##Jnw*41f)2 zoNQSd%{rqfYpv6yq3(z9r6-d0d!paFiaLoAi3}37ZsYCTQqwZ^A$3pbLB8bcEa>7w;Do{Lu{lnRVR8PnR~J3ihRa9Tz^ zxwZ@9!8#*DLMhUJk|^?MTu=VP5A zJAk=oiT@2_Ac2sl@qrg)pIjO2Sfs=2$>VYZ9YO|n_MMSy56GIMi4zm{>ZA!rTO#eYJBNQu_0r4WnqG0umb(ga!7lj$~e;am)`Mhik|9*NK@~2(+vBTr@!C9 zrM5rKejR6S6^pduYs^_ABhe2*KXEn=>#k!#npTL{@b;3;>l{^lLy*&5d~{uF@wI+> z={mmYb$-$9Wj~XYVy_xyAN)(a3Lp9kg5-3>@1|zE*4ih=mmYDP8m+l67L+JtNRwrc%xtgA+I-S`@%P!{ z6Tx8zt$11kISkikAyx6MH6pv!>>%{Cfzaf#^w$fM%TfJ0`^Lqrvvjjdr;J7yTP|J$ zail;PUmOdM3h9#oUb(OOfi<$Oee`WpA){7zlRs^&;v&n8pLd2|)}O-8O49b~i}3}u z4zf8ylWm4EkC9&mr-Yn(RDSVMBI74~SJT@w{GMId85^v(9`PP|dUBnAxi3O}s49HJ zf`Jd&cJexiN0ywFu|I@Hu7Py1VrioBvBvt0zR6h%c}w$2*4pn~eD5=kqCJbyC?jk? z{VRDG**Bv_(kOj3z8awdSlem^bH?%cm`wdeL-?F<5%EDp%GU3%&ju%e?ZXOSvTsD# z3j9s`4??lXBKz0zg7G|jcRe$_W>aLE{EpO99-dCzn7RD2XJ;-B&ETLv^Zqe9$#^I5 zFfqQ5ox<^r5M7*lyd~gWx8C_lVSmPAs_=je7Z*p%##Z{tu#bpw^fP)6gOEhx${*3P zq-t@Rq+B0_rlj(u1tT{{A2!QQ_-$?ZId}~y&!l!^q8wKWbt{o!RkX9ky;WYwSi>)1 z#RB`dHnjt`g@oU~8(cHI?G_&YMqkX6%k0;Q(hc0%E2ag~sPbRLl%A1e4@@k8GVJZt zJ)aW%Y~4C{*7q)r4o^lB8Y>65Bk_D^O?%BTz7bzQXLvVE27WzhDYWkPo)2O?>e+K& zR5pfORB|FwZ0{T&?T52YH@{fp?wORizw?tjvdJ`QDR{|Yx(Rg=p&FlSiI?2r;Jajc zrnO#rx!csnJ#hIW_>Sk;ik)1&2j}Vh&DIVy_}(=qthp)B4Hw_L4?-?lFXVBpT7#*R zNH36!S>X$pKG&PLgUU|q&Et^A-#T^>N};~Y9mlEi={*qasSCog@znL@woDi#i4f~- zktg}fYh-gUy1QjN$-Hr=;4D4r$7T4nrg@Cty}cEEKd6LC`Ql#SWXtCn-|aIt2QD)j zjH^C&sQ=^%O5}r$i2S_)zQc&``vE2*NBl}>Gt3S%#Wrzl8@*wrkh|Zqe56y(s)Q$B z$~3KN5(CH<8Z1e6-SNJ{7-JbGD8!CI}=-N-0$K1Tz zrBW)h!ig+g+?{6wH_gq}3J|H>eRQcOpY$L8P;ZGqlb4k=2iko@$6g>u4= zmr`7k>|%N4vW9!a?e45eI%fg6UzTW5PtJ%#z0Cxi${_JHSIxv)N_w_Lo<4 z)7}pSbdyB}L_bHQo;KexYqLq{3}cxb5_APz?a4M&CmJH1C-_w$tqBcHXiUp)j>+as zWZ6K1j1Fh6OQ{T%_J|Hdv>keJl!=EeQL4LsXcW-g%GxHg4)(OA+3JLwcthT= zsGb~Q62RRVN9$D{ONSe4bz0!9pRTdT-0a#W16wj`Z<#nEFK15j?p^jnx-%{x57dV7 z!=0uY0xQi?Kcbd;P6yP%cX&?+`uULwN=Ie3c!bO za=@Q6rE-3)&;C{%Vr?_TGaAh)4r=wjUczY>FUmZ>qJyZ&W<3F1+j=)FV#uIWb?3r!5iuZ7GKGCy4Xsll8Dvu&$t&Evr66jppa_T$(ZVZc!7_*25t1NH`l z{y}rR9gh3(O=br*Fmr2%L%nLPBr_K&Y%)6*V0162p(%eO~}cT!b@GB@?DV-)*5`w4DQNm-<r3>$bJ%3G?q+ud#?KW>#1MDqeK;<3A09x7`rR4Q z^Q{a}E~rV*CUl)eAI?_RM1r1x8Ibg}OqRHYx0ax$CTNv@5MNSkfF;Yyiq!Au}VN9&=AL!tse8$wH*Jh--ESfp#Y>7-B_SXX$ zWcm(P7~f|~&^FZ&ln+)sSFt0V^M9q#k2F!tTwR-h*A9HBAB8Y`cbMRU$tS# zL65ArpQe6=&T6)()otqL2MjT|M5?gZSvYJ5#6IfWQ8%1ya5%eq1HF!qd2gr(Y8FtI zLtCaiOtHK>X7$tK)8IItf6rj8;P2R{#*YWOlBS& zT}oq#O>NCGLgdobTL5L18bjjSH+!YB;4QH|3Z8*XQ(g*NoVbSoG6)^?ktKzpY-f3d zX)JjOzH5?ysTx7|em5GhTgda^ewe+m`DebBGA10U)WWwTcdd1E_4`VBSYUP?wkVg4#zS`Usq+8CG`Msm*< zOK#TUjK{W&BHyzF)YRO}wkRnVTsCqBi(+9QiQr%Y!wLLvHDxzZBx9xj1=pSXsXqn?|*DU;!^K3X5 zKIHlBU@W@Sv(JUijKZw;#k;U0p;pG3)ZOR}lFZE;M*2sz8r}I752INbjVy!YJ`=Mn zg{f{|$K67z1|CFw7U-_{{OMDY*^4#ioAQo{G=++ao!udM9W{~8-?64I_udVXJbfQ! zE;C3Em0sz29r=jVA#X*_7Qmvn9zFNQfRS#+0_n~^<^IzDCmF~0o-iqZ_Dn&*bZP8Q zXsn_5_O)QIcjHfQ0@cN>R)46qy{))T#OET+PUiN4-?0_oZ>=2*hPEi$OA-&zMb?ru zXjpufdqwes>tmOUdtndNH3wFt{q68H6|Zp;ha8=0a(+rq6e9^o(3}@_la|D1H6m`I zFdh5YZ&Vtu9?-Zmf-KV7(2n=K=81O(N_#?oT0c(fwdrtj5%xkBknqm+@fMY=Wlp&X zDbk+bOD+FJe3`u{YyEsW;*&#JrRX6(jc}S|jMrzjdNYum|8z#{aiHU+vgWFg%E%%3 zJv&ad^Dth}Ft0vs2eMYhE!;J%&91Bh=P#`?&jE$1sGH`Nud(5LfOXAfnisMUd_ys=L-19)J2=0QkY|<} zW1L*BDC*z9uR$OwA%~zHZY8&Jl)WDf-b8~-H^pn2g0**aU%WfN6QPpgQCx1o5L{^( z&yppiNuRpD5*8v9>-Gq)*K3ik;Kfo|mm2CDc{7`UR!52t_&} z>CNjlNPDRymxHWu>&M%3&MHofYWHuLap*bQKWrAV_lJo6Ooo`AxLPi9Tqg)=yC{F~ zDCc&UP;d6}F=wWg@vJ}XzPJ7S$oE=oCN?09lJbr>(*f7190d{IBSCV*kL-s01vhd% zvKDi)8#>c(+*nLBgS!2p@2bzX}*2P(ey zw7(JCXtbBj&w#!64NSu};~LME3=rqn@Yq1fus8c}QHv8@Lmf+^ry_JjJSQa@rY^m?jO{P#NPitEM9&63%ggZ0eDzi?M^({C zl}59xgh_m8eX=8vf^W!-DlA=Pq1JsnIlA{r){i>kl@_GB_7K8Uq4fHOLYkf~(XD&S z6l`o940*N%X%w~+L9%+AM%uBO082jDgee^eA@qv@ha8#PcY)fkjd5cpWPKUmC9|E3~AP)|OF^6_f+ zNckN3e9}bP$@8Mgx$8J`LEO2@O6T2Ef#QO7yAKX!#FRk`x;2voC~8MHjq2V($+40z z&5%;wNr}=^S!8kAvO)w>^?jh5d-6$?A;(rdw_k)zcZT`ko=k+iVJ(3Q!M`XDC`TL? z+1B6D|>$*|2O>)4*w75{~$OF_~-fmKLKw54h@h1{5>)-KnQ?FxnTejR}Tj+ z215YFz(N35jDxuc#^ovnPy{6MN6f#8#pZwfEOHf%GPkn9Tq#WM4p$K%7z7RnU8%iL zaTFSiK|l})6bOR-V?VANI?F#+r#4QF^(vQKc@XZhgbOONvx|5X6}^Y!m!YiIlarT!se!2h}a{}{*l6ZotCaUyX4W^pZVRoKM5 z@zbiy8C3dS;bn;njYrtE^3W@WhRt!sVJ`G=2kkklEdW#Mtxi8F;wDEMarLMnO%LFr zu!yXnJ{RZ2b6g*Ww|tz#z{jbI1m-2;i3+&ONw*$gUZQ0ipH}~#{M|?O$3g@bG7loQ zjTHPc`HclPM8G#)*Q>gvW-z#NI=#U0S0U+L+;{it8zN;f3UOychV9Ld>Ti57isJZ_ ztLut!D?0aTMPzQiQ@FXwFokuyCbh`oQ(_-K;St=AJRTim)WM1gu#(sh9xE5PXC;y* z2{6Lr*#V4 zeokhKhRh>PzxL7{SN@0`oIec*Tco8U3Lzv0g@C~Tpr{a737>pS^iFiGgtXKeQUCMj? z`uFwNp43>F+C5_V@3%c%eb?z!5vBri31>%br_^X841p09osm$el9c8Wn&dnMe-Gq8 zzIW*M{rW-?tNPokN$L&{Q=;J8oK?HnqQdk%emsNs1c7%nl&k0_glGEUNgpS|a*a$D zj=OC&i_*=XgWqf)K3X2U9uYP-J}N!wGGRB}AB!opU?J^n{`N&dm-0}`$}&-1kCuEY zW|E4BjxJDuE`!D_`VX9dr#efUCkX4BFsV$wOP?gnsT9;r(W?C2@|rk7Uh-T{1N^SW+nX4n;zZS$E5 zFmJBkHMex|tZOe~{IJBFr?~TXiJ1au^_F=A!wEU1;K38B4==squ*SJb-2Mzzjg!R7 zW&nJ~7N&WpuIyowkEqO2ec{mJ9L?|kRA1#1rbQp6R4_F;&DP4#2YBJlIXtGCk+cbV zV$<>^a`W`a`691im7@(@z{HU;%+rFxT}uiaxEznwlM$8Amd71kn`!42Pyb?Nish%3 zP>QG1d=J8k(e^B(%6OTGr}b51d+C2^xydCDPm>6U`&aeA@ZGobkWEMT?1y^qgfV-s zkX-8jUVExVQxoGW7*0IT+=v(xx%-2ib+bBRoR?sS>4$-r0-2Gn(!D*pDO!L(7Uio~ zXiA+uQB$&46X-8!7d?z{xW6qdnX9ezOAXpXE`*S7!vq9o^!NEBcc2C7sHOUElgiSv zUt^R-1z##KF`2nAe|gZE0V&4g(5;tZnciL^Hdc5;NY{MpAfk~C0HY_z8QL0oRZD&D zpKH}(BoHk|)j&SAj7uN6+r(=Kkm}Si`jktLag7hu;}5Rhm#Z=b_~#l`yz+0Qaw0(o zC04c?+iyLtl?}$3P3||5`C!FCV^c%+!2&lPkA9L^%chhP|F#(!$E03BlRq0f8|%(t z1k-yNnH{s~<>&0uB5%CHH9$X4G_KI!qZL0f%NUXSTz~Vs{-O2QYmIV<5-mK+c2*+e z>o&qecHOqOSMBXNOA?he)cou2ichWh@eGFX3{}EYc;{9vFVAoj6R{3%RT_TLs zA0fm$^c<2Zm2o~CAY$9MO}pRP_=&o$bF_!rv6tZaeU=5L@6+N{p+2W0R`I`>BaF$m+e;t*2<%Tb8n-B&xzLzUwE$# zD9*3(_f3_bD@G2$Sl@$KXzUxhEf6I3Irq|X0@bK+`>jBd_T9T4+=oXyV0M{{O{BB> z2&0uy<&*!?5zQ5uL^}cqd!-r^n$gs{KBF+^ZuiZ7d9XI6 zDDCrDS&H)KCpEfVu^iKNtSH&aT-lwnVhRILXfwB3e-&%3-hqcHb9hhehw-Tl-pk<6<`nxPfZUZ_jd%n^na8SEZF2Y3x4<9-fMLrZnt@w;yGxti1PiH+Sw zXjCr4WEVgb;SMQ!3h&4q{d(BHZD+P54 zgboxHau$iWwhIq;XoY+ffb`-h{~NqR}pigZX+7k~42=mXtB z6cM8(3G=&YZtNX(LJL2^`?HCXF7pznX|`n^HmmTa3Zp02xu1?N&F^9fFY@R#Duds1 zuAJwEXUh_iawVt}hB`r#`^v2=q^ulHjyqGM=8ybu*JmEq_gSU&)dk@cFj{bz6YPuV zBIS%>e!TXtstOZOBe;ey5?5x@#IH8;*V7qa*6@!DH|-6&4Ey&M5GLLYt;!ZMrHA; zFJV)(SL^DN#IE;R!kxA&gDv{`gJpIZ3DK+omsdV;72vdq{pcgHlJ9u_XFa@`vh=^Z z8G666tGj#8#xAU9?WnkWOGqZ=ogCC_eS8S%%S-yH;-0cVvJ0yqHIzTe)9Pa#M-dUW(yKofEZ@kZ+`rnhc9Q! z=$fwm8{6#Cm^f~|VxTC@y$57e}-hdE^K&cor)Wv%vMALb-$g($m)i#b8)b5#A ztr6%ryS`!h zV0KKfw9;1S!}W~I?9b%XE+0ECF>N?K?k?Jl;>^UGz*bXk0*7txQS?Y=Ew=q)_%5EU zoR+IR`L6eSKr)r-hgr~hiqe(fxYOD~Ov6O{H2b&^T^Jz* zZ{FCnhw*6Q%H#rAu)({;I<&?=ydvEYB9R~z#Odl7T1>RD)bSj0VWjrSk*`@@%k>Vh zId1oZC0qW6p$#bRVzi7h9ykLuH;iQ}2Vi{&hWY4B7~f_*%>=6xiLj1h#&XSFp4Lof znv(%%E_mL#XxG3TkSWWy(skP)eq5MX*VXXKoA1$Yobx(`8>|L!V|?Z>VtrN-<1Vci z-Y&>gVZ!v)lnK&maxPO%x;&*Sf?@71rhI>f2uwADcIr#@pf3FUk$&#+Kwgln2N({Zfo6hYNfF|YvnT4BX=JdB-`x< zNweCO_Y0N*hUD;$*B_RJT%8SXMCVU$)5RAUJaP4xUS^}~x;S;qYKZ>mq_nd){84%} zfspYhsky5%rn!?9m!|Sj{otJoM}}rAskD~VoR-8Kw#^U-d#=~sTNe5!HM{*~{o&Vp zyBD?vGIW*4V#>Nx_4Lob`^Z>o&b~*_7g!IooHakbq-4?Z(vNI&$9-<4f!5h{R)&afqcql7^a#E!NzwL2XHm9zLIV%X zlb%>E`2v{r?Y8V;cF%QJLtj-(-hHc_p$&6}_svEAtZ_b_crL?E88sM!F5{pjFppAe za(Xit(&X^)Yx#ou!+lRO1AyTBgedBna|e7Of4lQs>+4ps`mOuV zV}oQ0Rhg^%znXc9D@vBDBoPw&y?&ydlcG(Nw-$<7s>`;hqaaqvW)ks%eTo-F!JZ{P zz|y;_#_2V$g+KXXa^bV_CGLfSNWg%rZ$NZjUZ@iXR{0H8x+{9p|DeJ0?4@>4Zntn& z^a3$Hhh7;qC0VqdzK$11ineR&JlPw`0$&OCfO%N9BymxoseTKd6ON~3D}6`8Y&Pq; z#&A@zIDWnf8~;Z;?XFaL$@u422S*PoTIWmTr<+3cR~ru?<~=Lga;>{(9LjRlx1{9n z`8X~Y_h3_nIl{CsD@rl=7x=R5Mt6hPtvCtvDn7z!_!h121n?gDSihe1MaLQ?Pw8(t z<0I8yI8(KLPNRMr{G3}%&SfvccV`BjaoMS7`2w41*BjRT^Nz-6f~wGWOV64NKHdI6 zOYY|tOqFnkX^GsE#jhrfBO7Yn;w~ZlWTQZ|@-Y%8n`)rjl2EdcyNa+dpIaB}gDDp) znbeL@t0r7w>_<+n|gD+EDhY^S4P~>mMqkl#BlaQ5=Qy1(tLBxoKahTg#QPry1%!e zRVrt84U^0S`+APv| zW&#HuAL}48h8y+>5Bir3cTYdKsDV=CJMbzKr}8REh3P%3=x8x_JNLia-w|{U z@oDDXw2-s-a9cMp>^pC~xmz@-8|P_6d)LAkKSLGkXX(Yqzgu|_-9>JA@C5j7Koh65 z1B)ydf+yFpX(^awek1=PNMlQUi)GT<1e;IiR}0?CH5FM$^;r-WJ)t8;MaF1qIA*5?UP35KpDBLpC_wRMixW^%Z zOUZM9h$8;%B4AoSq{Mxjo1WtGt+vY%vj{$h#7sQ{7Q!fcET66TIxPs7EjwgZBy5RE zx0c|S5y|XmyJP2MDqg+1>4;n~@q%zqca~%VUEz;cshqXZy_#Gtulyh%3rBCq1w;Z! z@F+U%a!FqDWMH^`DHD5mA!F+-x>Ezh(}A9Q?TXjN z8?VMUt_xLv`hu)Qo)*U>f~;(d{A$By_cJP39&xrr?pBa6uHeu1&~~V8jkFr6QUQ03<ps*^PFabMMy|iI3=?2soZnfR|WxJEZ!gfe84_bTWD@q z#yg;f&H8cct6Glp@=rbiJF8jq6~{U$Ewy70k5xr}oy&LB1}I7QQ!gFgb^`7K@-aUe zqMUq+=x59V?^JOVN?&~nI&DOYsOV2U=3#{)`>^6zjm9|bm4Usr>~N&VCK`^LUi7vR zJR9?f2QylR-%j;`U2}rgf*lAGm zxoni$H=Gi~+e2L7uFt~i4Ml}E6&{a++#ng2b@|>e&T((E?B^Z(zOXP+rlkh4K-a7q zh;p>ox`v$b>Yhe@?$ci#^Q~7Zz&;j+niMzQi*PPm6L^2H@CdL)SdR^Nib?WqqtB4qL=jXixaZNUu$QP(>!DOP&~5>cz=w9wMe$MjlTzDQ=c3DMFKSpIm$Fi1O=63-keN%)7GE{tH)}`VX}xH_#*bNjHGWA%LtL-< z1tT3Gbee;FuN1V?s`+~7?!Ibl*=jLme9yBGnYH9^x{@kp?^4)9dOBE6{Kq=hmku9D zUlZlX();3QcI;`P0zf5RHC*fC)w*R`a`_J)z(tpLpMgP72b2AS4OaJ*3@ZhvS zaY}OjGuHq(1vyc0Cp4!dP#kjedL#f~aY0T;w6%#d+TkVyKo}?txbgY#mjz7zxGZ$z z4L3oWqi+<(*VZ>4XuwS@2u2hM0RtuAP=L6ogcuqC2O-6P5@O;A2<9dS1O$wN!l5YO zjV}a(fk1E6K>$=74g`V3{w??)Hxd3Zu&5|R0*;ipNgD;e*$4^*0RtdNBnk;ZiXxCe z07L?W0U?k#!No}+Ns)_-{+t6>b`T%J@>x)zJJca zgYkF-&KeCwqi|>-3=4w-Q3!}N5P^n)uy8a63qwMHwvJewC(r@shLNzcbtL(F0f9jv z1Oi3^0)fCtIOwnKpMb$gz;G}G0Y^e1FvPEQFaiOAkbp@30X%ed!?^J9ko<4?^HKg{ z{~vMr&-TKf?G=(g_-6tCx9i`{$=T`uME^)I@;~bTw?D~0fIsw4(ggUE1@sGhH}o)t zDZfxJh^_RVI{|cmc{o+6p`=zTkf4$Vuxjd9SQYIr5AQO}T?GO0MY(b+M zpWW8!<3_qa0bajSS6XteR*TL*r1B4x=mHQoA&GS*7B@4UbwsQ(Cb)lZWWa=|7Q#T( zqtZiKJlqqGe=W_vq!7ZJ*Pk>hjUz&qtx?JD1keC&QZwFn#8yvb+1Q*3X#;an*38W1IYIZ49M zT&bY%c6n&6s6ODRK+2-`W1sR%x7^+!!_U_M>djs2ftN^?hmaINCZ(0Di1=K`*%7tfNoen++PaoAR+?P>+qzuWK>jmiQL=k?#& zsAE)9*u?gSfj2fu{s*`?S=+f{t$;`*;+GCF5Fi8!vjW1xFf0l^gvuUp-H*=&?miWZOYoUs^j^> z&Rj|JW0F~XVq)ruOQE8<03&nqldv4A)E2+}uj7-(Hh5R`TeUv60y;KEg?7q|q8}_* zIrSe~7O?6tt*o(mktAK9c)PoIcrJdXy%) z6U;-EXtOp;o@OKv9`K&b;QN5wHh+6*CsiqrXsHQ{Ao&zc&Alh-oew;d6bOe$GCzqv zSxo|R>5@IohD=vtzpx8xJrIwIqfK5?BXx3Nc2UdE=P}iN*0Rb~B>k?OZr)=fN-@`t1qFhf9_=v>HOVlHC#b0~p`)7;16Q15`J_{L z-^x%1W?au|7*CifE*P)yd3iW6?Xv|n-8JR(a}GHgnZEHR)WIRUJ;9z=t5oB$NnUGW zxyS=Sm00yK_V}zfB?V&@XBFRn6R>WwP_$!)}(z@*3t))`N!l-p;&`Vmwa|FPzNnV}@=X4m^}wtLA}dT0luI7**e!wN8t&YJHr zhc}mx3_frhV2E^Oyh#r(eCUg;qh%1^)?utP_X!Q1zUe24AKOnb@G_sBpcL1f8O3OA#jC)TvhW;bX6A zAKifmG1D9av@u%Wefis>kFA^3EA*V@p=iyJQ5|y1R_S zHV@5JfwETiC?q8aHhR7%7(XWu<{I)hjWvb*$&RRpU6ZMG;)WJqa(y`ig|0C0J&};izW3d=0 zV-@Y_EPyR~vs&aXOr}xSc;Gd%b4!G#K%dcOKFF%Ul0vo2e2Bt_cbU1J7HI8LF*h7_ zKY$TX4u0;pGZPVTtFjL;lS8h3@$Nl-nW_?5T38Cu|DF-k`PDV=y&30pr_PwY?FZAH z^h?LfkG9hp%EEJQJnFKkfCvjD4R5tsk(@fQo!R(7ei-=O^98wTZVaX$P#<~<^F-3;+r3$$dY7)#CCz@(m&4-}6m2UqBqw8gM(JGcklV&$&;b{0EOZtEYR7&-lP`5pl{c#kEh^-S4o?N$x24 z%?;@}WZ_*uuHV(ZviR896jJ7HUOW?KL5M#5uBCcPCe5w-c=cxs)4|?yn3HTNzif%B zYG)4DwxEOWp`>Yq`?I>^7iMQ$lqbHuJ5BA?#Eqy#&QL9%K%euIS({Z}`nz6+XLaw? z?spI)@5d7*2Z_%*g181O~?xoR48>oxB6SE227Y5a1|0*9;rV`2wi-eI{tZ~zsKxw zh}$9Q(|Od=7O{A~;@Ls*d;R#%SUd~*Md+3qL?dn>2ICz4)o!augs)>`!hJp%SyR2e zOnzYgaH)t=vMy`u#kKMmsacp;+WKJJ@{e1wz?lKHl+3Xj!+6$9)Nfr)&r+)C{dO}o zEvjW}<};D&XS;1JpB$zWO}UI;aJ`s+R6u4XFf~Vu0t;{DH%Ld=ehUHH3|JtzZ9Z38 zAKvf)*1EYJi+Suj8>+l8(D|s}4Wdnqy9x+Io0p)zDc@S8BXcN3lNvnwvSLyBvsT*Z zamJ@j;ix*vnre118?DZh&&Je=hsDuk?bh2Vn1;e60->DOed9-=u?Mk>dwM%kY{Pr{ z4t>g_t@3N`Dj`i`5p8*I1?r9A&!MbJ3*t(F+QZKJreJB0_jPrC$8<)UHFF!|*G5k* zHvP`r7z7L-sfgcJiMo_|i+qkuMX{?_qno1Zx&ojFyogT7!hBYeJ{w5>I(=NeSy(sx zXm0KreZf}J{LHn?H47V89oJndJfZYENbIVc`21rAXt(O;vYqL) z+n)|2drt^diDzXR5yp{`OKJ1uVMVpG%Nk_5zwLiDh@(7ULwx>TSmaQB(tankDmT=D z4xj<*;6Z?|nuh>wZRsB4u4FUvhx< z4+3${Dz}>bKbC(@q|$n7f9(Yi=f@ayi~eKJ$<=E!lj{tAQZg`?^$PA&Wyw&WC@B?v zu6!rJU^D9$xtg9h>$Nx8C|CDdqbt>;sCC8d8KB>OaN^nKsU}0HN{9pDe&4M9RKUik z`_w5qvDoMe&E=CZm114jr`ylY7u#Rn*lj*aKO65-m)&VEl9TFJZWJJ!J)%*lUwb>T5kcv&?kINh2)vJ*!6dmrKg6*cu7*9e!g@TRtNOqh z#%i4wzLd8kcR2BgsLrU2;U!Ab-`Rf$MFcvp-#9I?NN+7(OX5%dW+BeuYY>7tc}4qk zJbZxu0J_Fl_ExTG!soo%Z-Qi#a<=DQPEjN6nG3fw8ipn;0@ zWbPySo5?}!O^Now#;1236;tj=Bt`X#{tU0>N7jlYcb$i1`NU?euSOU~^`(6}REx~w z>RyC=V{q)5_$hI0BQ)M4URc}%tRzl(^t6ECp@7+2_>4wzz*I-K%ud}o)l^Mj7C@@f zk0-hIlri8SM~Q4It8nYd(OlEcro>e9NM$IxSgT&-XzZ)o+Fss);hNS37dA1P{-mGs{;4J2T zMxK(vT3TZFQKNC>a`h-q*nV=qgR&{NTc5r$UG3Qonf8-w${b6{+?xB+vvm5 z<@J}`HP+NB-nxzs_|sM5C=u{O4`3(fY8C9`*l=U9U6V@hr+~bt8vbE0`iiHYxW}$= z^kT$G`_Q*F2bQUI7V|=}nupYI%6+20?$7v%+lfP+jX!IA5#}{^Bj35aU)k(Dia2H9 zh;iMK``juyK}27>qk5ih&6YM@V6)V5U_V}Ejl|Wp%kcN~?ODXInrrNGr(DV-ZtW-% ziJmf&?VE`%K{BUy3m&s`^Eq^nOkh26b2b|)RqTrup1xBzGXawJJ5rl5%uccd9y`A2 zw|oT3g-le$>Eq?art9&lluu1AFg)oi4m5o>O=hDC?|zFYnrHIbW%8nySbH^I@p3rY zBDG^_epUxv^^WyPAj#(Ie%VxRzvgt86NJO)ZMnH4wn9r|tQa@ty55!?mytz>%;VrO zKi&0_%!Pc)vWEq0mxo9iMxsYNNelOqL*QoHwfM?-s#Cm)xGKzkr*Tat)KpN0svo{I`}t2)EB zoSy2|4BVWgLP!MHn(zUrKo8uC6)Ny-yYf0;ZVxa}AY2_{P8GLJO3i!wHrI+g{j*a= z=eZYV3}eEDi{1)XqM557D|+$M`s;tZDI+d#Xnfn}LfezZ`8B4k0We+X5l>1rS$Jv(?_`{y`kWEd7}a8!vM`JojqNS;wv!d;lD< z?$?`&)3Yv|q%pX#!0hVzSx8arCJwTLy8z?IkV}#gS4X)?j$3!+Wo$0-zV&+H7<=o; zF_}j>lhVGRnqEAvv$46{WD@ukJhaC5I1kC6!&s$;58RlEe!ek=OFa##X8S~M(Rf9Y zxncyabE#Q0Ezlciu8GXJew9p>iC6SzmlxM;00o{^h&{n)MBR`kXEKDo`3*OzzoNjU zBG%=Mgl4vkgamCa(xj^HcjiiP(Jr&wt?fN6Gh^c0?q$~D#?15al)q7X^Yd75eL zF*_*FI%63*(RWM+VJ*gAI97klsU@yHAQdC}_2LJHF~dmJIc6`exCm`tCb@nSrNgUm zWW5WAUp-6iQ*cyoQh$jAf~p~F%1W4Kth4)SIR9X#Dbmi1+?1)mE(zGuRq%MQd$8%c zc#;&FgCbtL?BXZVftk=?O+B*s&tK_Kj~yHR6IgU`0Rx#AGBV#EREpiN_GiZS4=AYf z9o|f?x*ejbA8{*tAfZId%v$B-8HAjlP1w|me^ z^!)Jso3E`+eq6w4pXABsDexRz7{!OtyJ;|U0$b(+TN^P2=JEnGTqI>L=58RE!lPqc zmGe>L!xVRJq2X8So-A@ndziUR?GvODBUD%iB3)RNh02dUTu-vPadYFtQSGhyFqJ3! zHXz&~!ZvLl9`>b>>2}7Tx@pp_`+CEHcc@4R#TXb#{q42+Uo1o$ciq ziB63EZ86c#&ovkI)fOJtr;nJHh zn-aBE_;TdST#Jo>tM&z(fY$0PNuUT_ML@pe4SGS7YXWrOdVT(_;)Akw6>-XkJr&RW zcEc|}xSu9}7dLv6Y4ldr@j{sL8rIE`{VF(9y4J14p(P>pc)!aVX>A*9=`f!la<`)> z($VK#WK(%-6-JCZUQ)Ur7T(YUe`rSkmz-O#S1sBZihW@G!ip$87-Hn-Id=>o$~69GiA8mf$`$?{KgS2 zkydf4_Sm&uy`0%U-{uEv6B5kzI+9DkmN!%I=4FjX8FPxf)aQIa0jO>9RQcsaRQb12 z)Byk1x|LEZU68_yFv8*JR{YU0n`5mG+>$5)1DLNY?9F`0^plRH9s`g&SSle(CFkM?sd0{N?=x9px`}6#`vyj*!@E`!_nOUKb@eA8+DE<^U+41 z#rHec0h0p50yVQwXM-q(j;Ga(g0zULn)lhWZM_q9qdsI~ywzFXnN4)BoVuPAWan;g z=91BF2drB~%w%t;49b;+-;8-RsKlN-f?@#d2SOOdNcuB%;wH8@xx zIqN2ZA{nbOd$-?_tiYSw;wM?csJPl<;^Y(nT$Hr$L{xRjFeb+F>U?LB32ZzsHB=|_ zcqDi}iZ%X#eLHfGd$wqz4 z4}^(9xXeyv^W-f9ne{?!6v8j`=~CVTM!gsi@aHxBXsh&ivXosw%>MPI=r<+vP039{ zu5sFPZCeW&Pp!Rw;4pxcyv$GV@#k{F`0n=?>KQA!wfb0AM8N8h>r=kS_qJTX>-dv&=``CXbE*_Nu*9%ZYqTss_r9#sokv_X*Xq2moC@FTPGq z9iJ0^TWI4Ge@6ckW*wGZA^S3sj@CCex`Sc2xZ7yyUi+h7cuB#YDvF8d2eJwZ$(mrg%9T z<~UAPbUkrGYVgFJzo{aJS>H{gCHE8id!#Zfa3+X#Ca{p|mfN+k)7g6(jHq>RzmIT(AkBp!3NNmUyQe*4 z<>4df^J|!Xci-p{RJ?b%DddtxietR`tp6}ZLZLxOt&H3+Ya;1O^v>dfYe%=>R}?}W zc)|69lKb`CdXuDzB6s)~#TK7gV^6j;o0TIum#=ACBqk*PUva=V+TwApZW8xho&NsL?%&3L+y6yE z{(b)kj6^{H=>Pl!@aD6{@=5XiEn@(D;(S<)8;(!v*T+G^5oizsBF^WEbFgs7x%^52 zk^oD9e);_CuzadC0VB|G zS)sttU+;gP%+<}w1!IHz zQ&wjuTSvElQ#lxnMdRRDuoVQ0K;o>Szv2;AFa!b%0l_gyBn*Q@gRNmOYd8*tg20e4 z5C(z9!*IW@2#d%5`Y-;a@<-0UE|5=MS>FYR`>SpLysrDZ8>?*f`=)IjaE`9FPL8g8 oQqcdjJiM(v&JpAAJM4eZ#O3oL`GY_BgMV`V1@gIQmjEgN0P-6z2><{9 literal 0 HcmV?d00001 diff --git a/crates/api/src/temp/2026-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz b/crates/api/src/temp/2026-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..37d60b0da92fc56b3442b6714c790f8e9239804a GIT binary patch literal 7217 zcmV-19M0n(iwFP!00000|LvM*P*m-fsA+N#0R@zdWXaHpMi3-Ora^)P2@Ty%ZkmiB zsZkJ+AVD$$A~^|?Bqzxrl0k_IA~|RJ>YP(^&pmf)=A4$5l0PAi*;Ek-?T>TRFQ{!%R@ zlJcR=QSu-3DlaE4yJ!KRmK@P;=TGV{skBeLFr(Ptq4I9k)J4go72{6&j9MDxY5>G% zcljdMjTLbjf~LZ&Vqkz&ycM>IpHV41;tNk4`pn?B7g`BQG_kaz6u0=`R+ZRYMt}op z8Rn%w4qlzztGrz1=kVL8i~pg`+d>9-xufYOwp8>DAo9-56j4KS#iC)g>d;0(eZWbP zw8f{YVWmX(yidV~P3!=T4j-AHL$*qZ0n*}a+UG~V!!+Z`Jr9&`YDT_h!2}s>i~^24 z4lH^Iu4v~J(fZIh3>5x0k_(;wdG@JQr9hebz46G(RY2Zqf6-4<7PaKtQ=^lLizk3E z?eeV9cz2bTJZ@46(J5@dy}G%W?dmf2wQ()Qo8E$v*#P{KezA=PLM4q+=x|&_cI)r& z-qqRK!3}980~XSk|+_FwKWnU1++p)NFYRib&9Bs55Ygn zzu*57K!_yd|L=eBpa1_q0*(mszdYbKOsL{n4U$VU+>bP+`AyBA<%qxFSM zro73-gos7HTbVEu(`ZxK@9thzW@E`$SQ9$UbYa8FO7RCRILcBN-6ymp_r8q1;o=fK zwvw$6&p`Pp4Q*!Ax_IDyoeexZj(9YE5|1Zu)8!UzHsZbTM;G`$7gqjok~x~B8ygGW zQF=UKfCmkNZaw%#6DV3EkCG$DFz{7q2Gj^OI;;jMxao_PJS*XRugK|aS;DAP%XLwdhiw$M}n3@KYK>K?cL>0M_^1EqmyD1Ch*Cc5j$Ya~>bDg+! zV@3L2K5&@nxw{=7Yj!alR=+jraUqAt&TtQ|F!s6@O8J z(mjLGcMTjZ()luaiywClmNGH*hMB4IU0u$lyF{j4Wr%oFhej1}gKxdxgAo^$|cc9r9U%@Bo5Z*~Q5CIxo?! z8A0{W+l%WtD`Y!qMCGm{9OfU|@@Ass*5gWPJ+~TEM{6TQza3%|A9+fYQ6Jb?D$N@X zheloQFRZt;-JKzSf!i{FmH`)e6tBQQANc5==3QX9Fj2MwNdy6P9^1zeM(P}S9Fj4P zMV&1NNXeitL>AZI)_J>37mRBIjJ5FTV zZ50*+0@_03jL7xT7M;RyW|aQ+&wRwF<7?xlOb#V;2SdSwOo?(H0|!GYB#n~T#i(Tf zW%V5P4ivv5=W87(T>;UnEh7+`KOf3x&HJUPWg5jy$*_nl`IJtgaC)!>h}uKRkEJ znlVh%u%oYgJe9(b_ddbJB(+V#27p zv@~T0)u_u#=p1{%>Bl@z=kw7MF}BrPmsGUg5rQ}wB7jqv9&#Lndp-Lb@|JO$z`Jm3 zyJF-dtLZn`^}$PRT>}QBhTw!+)pk%Q&sJZejAwgxnDs~%|4-R((%EmBU-2Xtf9HH; zr3+&0h85dBPva$U>C?T_)AX%S?9u7L>lg6Z^N&=u0ZGRLm9jAjNDki`?_B zx#o}$$qTC>9OE5TGi;vUeg~I@Tt=n_*R%VMG?BsOj4S+B%-6dM*YkzDjFn^fZQhZt zCw+&F&9ipCcWnQ`b=tRF+5eD_R1WLCYM_zZbhyBNSeRcSnE$GmIpkJlY`Y?5&uvDb zRPutfAY!C@fy+sFWz+rw&8Er8yWY#!6I-GU%{P`7sP7R_i*j9e9)H<_6o~>0XBJX~ zpjj074ZJpT#pX_v!)!|ZesIgld2eRj@sVT?)uWFy(0v)&Xiw-M7}8=ALQL^~A-~$n z^FfIp=*~e1i>I0I78{N8Sxt14e<2xTQ5qLLudX%8m{O*8Lz{gFx&mUPt@||RQ+)lo z-q$CIp5wtw+|KovzNyac8O;r$1A=MGQhf0wC^BM)9p<7b<OeX#e8m8M&L4cU$ur~$4O+X!TFm+Z(a^$aj_nYAU#hy;f_9E+tcdz?$D5`gksJQ z-y9t*W@;MD2X$S`Lp;MRdFVcUZkb$xcFEwg*P)p)*Pn$8mKchDPp_pE}u zQPoi2yIjjK2zl*Tm~s>i?tHMIo0&XZHF1`(&=(05W4QYDa9K=?!~3Yr*YaIwQEuim zVc#C(^M{{X#QJF0sVfDNEbnIOnECJQJ$!utZ@1tG=+jDM_g~2?^Z$9m-aW0FATmXG zXu>y$;t_Z>X(<~6>rw953AeRNN8;a}bgFpypT?HZIQkz|o9c}x`f^5GnfatYjBpwZ z>QJll9v4uT?3?2IRHaPrYc_nr4>v!m=*npRu@_L|w8L+4ay+8gIO^1Uab+YFFSS>w zpFSmX<7R)C+ElpkdDa@o*}#*HWq66_UeRlhQA|{En#RWRpj^iEKp|`#^MI|21%3Eo zAcTDl;@9(G0C?f_EOmv{T4=`eTQPSfBPOc!{9X|jXySL?v%i$*Bvawqgx&M?7p=!y zbsakS9#-Vo-vWw#+#@{T=!U%=JmZiN8=MHx?u>(Nt(RCPoJIMa){P|T+i&bFjR2`g zEfUZV{4`Te27V-WRcPAZs^m;XZ#LTQpKS#l5p4q#H-&&y0d-h+ZJX-5ln8h2u0Zj( zss|{^j#V3A*LDD7-h+#NdHnaPf)uD3%cIk&=()Ou)3Pr+`A<$CTI{-sU91$)U`|sP z061_Wi|WzF@iKpN#PITbr=_$Wn(;yyD%k&)F@a)O&<#csdNP31A9nmFpZY|zECZ7#>gkPuZ9mx7(`2qiBWp?}7O!pA)>ZNd{ zc%SfKZeX6G#my8Jcyb)ztK>UXgyN@4p<>#gyFzC>&Q=+sfVB42N6E%MAgFX6;bPmb zEKyI0+_&XL<4!bsM{0DuSB)1U?{ACNZB@<)z(d_MKG%uOo?$88u!Y>{8wp0CnQU9q z!mT0{-gnlGvXidPD+ed{&j?H_(mSCI-#;VYc=$2*Q15-8Tl;G3ke|)>@A^cBIb@an zpM@)hQ<(+xUd_EfkGE~fx^CBULigir=yFRjDNl%CMb$?w*_eV^JkMlvQI%-fmDXH= zOw2sCry+fJKb_v()6jcGX&d8f!Ks@O_02FRG3}%zG^5Q{Wxn(0J~r8yJKk*u5~2r5mx&Nv_wI=*J&Ov85Vi0zij*M9cc}CUkA7dRkUYYz>ae ze0%J@BS*U4M3rrKfViXSn#NVO_`Ee&htEP$QSmn{->=Pgp6;tA!>B>?NKC?ujDEA2 z1n%Vi68en|`i(Z@;brdk(K0?0R^gt07&D)5I|2RvFFxwGq@c8g@-EBgF|`<01g4bi zDa&Vv-u_iiUxcV%k-@D77(UMyh?m3i|g8m!&N!# zNZe?~?kda`&uZ@CpnDwwPFXTvpM&hQ;diY@OEuE<4Q zuPr^pbEPS-38lsG_7Hy0hxjz zXMEl~jJw(E^TFPB^vk)(qUt5n-S;yw{02*3nCgJLGmZCW?*{?I9CxL+Vy3-k%eWmt z7G<12-h9g{e8GIu(nl625Pg+-Oq!ki4T7YGv2kaJCZZjor`W~4zs(IoJjzU(X^v+6 zYLVWz@@4_S)|A6=Oh~iUofn(Zlq&<6CtxW!tsf7p%rV3!s*r>_N|3$?G9 z#le_OIE2V{a&+-PnlG-E{K$2lg?=xR3Ar{x4V9P1e2@1gx#~bz_7s@)N_&c7ux0B! zcWRoGGibsa@MR<`?G}%nT&QEp3XhiIPVC5i<$wihzK78Tq~-*1JA@M4YGQ<|Emxz@ z?_n1c%xF%vOUJg)uQ~Eko;*FcKTlLtcwn0pT^t*GOqxRRtSfnGKF|86(XRieM&iPt zSQB#eSZ@niOWu+{7v}Nt+sxsP-F)wnPr%`<1R@r2v09E|S+;IU%C`Mk1Lb~%pe+@q zd;M9B5nFHm@HTU%ihm`gqyiw&ElXu85KYb*;fY>Ueo3?sup}~O&tK6jCT1v;N#%P` zH^V2OJ0CP@UrN;1Q7K+oVrWWht?5n?_Q-xfM?k4;G1>A??O1NTry762f@gGTd!pqu z)!2rI)+ljRc5Z~wc%27Zl7`$d!82Pv z^S+5z;V?0I5uq#yv5Z?fQ)76ceyWI07w5G!q9#=@w#ESp+J%lH`2pt9(%>5l**6s{ zm)lB%gg9Fn>)Q_~kcE~5(xBx}j#BSLwN_gS$%VFOolQlU!!&+?`vu#c8C|HtuPylL zn{lk!?E~D>C_jP-9UgR58}VPEO07uevUR&-#;j7Ss0Je4Kh)*{u1=B-wbd zT5*c>6f-HkTP0yN;mEukqo>v_ol|Bn(VeC+0uvLZND|M`tGH*?vrC>88-(hg)Zk=7 z6bmd)n|$o-uD6jdwS70FSF7qN(i<1s(&H5wYJQTQqD-F~@Y*g!R)whe2U9W$4|-k{ zL~R%r-EXuuStO#7@&yto!TyGm%C<6|nVk53eh>`WAdlwnw z_cT|EVwM!HN(@eGCVm@cRxehMwUJ8K>?{mFQRbx-yngazT1)#mJGa;LqPeY02e-o9 zo9Tem$wrZzPqUB8`iYCZxjT%-er@E5?dZyD@g~+_mYDfQkT2}M` zKqyua%O@DiEJmI2)gHq9>6SC>3L)>Jpl!0`J38Xx%zHYmw|a|$`JHKd4t5dVXeD(KF*#~#VqtU5_{fFrqeR~=PcgBI{C;YP!ocO9~+ zgH*7xF0RP^ve|7yC8zfZXss+JE^qN^N%iNfGWU70YYb`r+e#K|F|vV|);FdwsIPn< zBVt<=bMtw7;E=w+RssGra;=+T{I|3z=hW<05i=k5tI6Ma`Isq1mqqZ%gszg%qKc&I zkKZIbWKF%UTj4>F@11K7Vdh)5_T~;@HBq$>qdSdbPm;2D9@qSdvPUzexJKkYZfqaN&9(knB7}w|u@A@6TAqsO_N_VE#)ZlA$4Q3%>Tja0k^M5#` za8#P17>FEG{i$>qEG(!}sADXQ`Z01AH5D#h zLS}5r%ZE=fS4^+*NjZTy39p|yvpu{RJXSILDXbcIDT!cK(`vtb#cWjn!R*tC+e%Rm zzI{%N{N^svOebq==trt+_=ra!T(Fu5c4vqAwW553<&_SqKu_CI-ibz^N?gfn=9}7M z>UvB1Z>LXmQl|BQKM#^UpI1Jo)>f*GBFr!KbcpeI2p~Ssw9=4#HA#Y-lILCeQ){p93>D+jsF_*~zrWO#h-gPboY zs*3AYK#I$=QtMZ>a6#Intd4{QW1U6$_oDz44Y%(+RF=;T?51NZYR*Yu4AnanE1#vd z5sUIUXSC{EOd0EmSPcA)uy7#UJ|8{cp7z=rx-w2+N${D5Za}hH6D-lQ8CRAVa1!ZAjK}3I2ng( zHvETmBJ=f*xM?y^wlnDp@@C_%+crWnw831^S2IkLgR&1eW^h}>ThwMW1CO>J7c7#3 zW~vrVpT1qP5Kp7t6qf$n>1wr>eg$5v?_B$~pzq7HC>FzjeM(E@Wjz!a&2dKYW1GO$ zzsr1@hDRrbPc-@ITGUhZrHBrlmXz|LY%cDaPBmR_q+dP|6iK)o@6auMv{3P`64`O^;mze&BHb(T!RxydO>)V)TSfM#QqLWjpzYq zp_i6G??6ZB}ZWoTz%wCkrhf9=ep|Tn-ni=3(EU6mIBe zbl9(e%K=ZXP&+(T8~P%8f#FknhM&^c*2%nr+tW?q_{=LXIcZzH1lKnKO6fG=o?uSu zCYe+)1>Y^T-}e~5ujX-4#xZ_SqNwAQ8~91s%H$iSzza&oz7@?0VUrUdYFj8CwjG~Y z}QM}={JQ8oGLqGTkkUZo);ZQNyytkknM%WT(dOE#-KI0zS(TUAva)3X42v0Cjd^t>DNbm>ZguvLLQEu*{_HNGq{ABkZ z<3F7LLM8ux{v!c}Nc=hf`2+CfvO{u7bNwT009?XcNQ66zOZwO4K*120I0P!p<%V*y z@I<-(3L!2kA^O|qUzP5iAW2Ci zL=p@^AwW`4*e?M^T0>FR5-=nfh_pfgkrLuoXfz6j27=KD2?Y9Iga6({_}johDHs@q zl$1b2t$_$APzs2IN}{cS5(p_tYY7Py1S$ao10gV&1QLV*O2QxrYlJuwB#xB$J>3+C zN&E`z=I-o@utEJLD%RN!YMaz<;wmw4EaggK+xo_MaHya`_Sb$)Ehm|8o8ho^akh04e|g(y%7J literal 0 HcmV?d00001 diff --git a/crates/api/src/temp/LOCKFILE b/crates/api/src/temp/LOCKFILE new file mode 100644 index 0000000..e69de29 diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs index eb38bac..ba24979 100644 --- a/crates/snapshot/src/engine/mod.rs +++ b/crates/snapshot/src/engine/mod.rs @@ -12,7 +12,7 @@ pub struct SnapshotEngine { interval: Duration, last_k: usize, snapshot_queue: Arc>>, - db: Arc, + db: Arc>, registry: Arc>, worker_cv: Arc, worker_running: Arc>, @@ -21,7 +21,7 @@ impl SnapshotEngine { pub fn new( interval: usize, last_k: usize, - db: Arc, + db: Arc>, registry: Arc>, ) -> Self { Self { @@ -97,7 +97,7 @@ impl SnapshotEngine { interval: Duration, last_k: usize, worker_running: Arc>, - db: Arc, + db: Arc>, registry: Arc>, worker_cv: Arc, snapshot_queue: Arc>>, @@ -113,10 +113,19 @@ impl SnapshotEngine { } let snapshot_path = db + .lock() + .unwrap() .create_snapshot(registry.lock().unwrap().dir().as_path()) .unwrap(); let snapshot_metadata = Metadata::parse(&snapshot_path).unwrap(); + // add the snapshot to registry + registry + .lock() + .unwrap() + .add_snapshot(&snapshot_path) + .unwrap(); + { let mut queue = snapshot_queue.lock().unwrap(); queue.push_back(snapshot_metadata); diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs index 16f38b5..8884b6f 100644 --- a/crates/snapshot/src/metadata.rs +++ b/crates/snapshot/src/metadata.rs @@ -33,13 +33,16 @@ impl Metadata { if !path.is_file() { return Err(DbError::SnapshotError("File not found".to_string())); } - let filename = path .file_name() .ok_or(DbError::SnapshotError("No filename".to_string()))? .to_str() .ok_or(DbError::SnapshotError( "Invalid UTF-8 in filename".to_string(), + ))? + .strip_suffix(".tar.gz") + .ok_or(DbError::SnapshotError( + "Snapshot filename doesnt end with .tar.gz".to_string(), ))?; let parts = filename diff --git a/crates/snapshot/src/registry/local.rs b/crates/snapshot/src/registry/local.rs index 9acd64a..8f79a73 100644 --- a/crates/snapshot/src/registry/local.rs +++ b/crates/snapshot/src/registry/local.rs @@ -49,9 +49,29 @@ impl LocalRegistry { } impl SnapshotRegistry for LocalRegistry { - fn add_snapshot(&mut self, snapshot: &Snapshot) -> Result<(), DbError> { - snapshot.save(self.dir.as_path())?; - Ok(()) + fn add_snapshot(&mut self, snapshot_path: &Path) -> Result { + // move the snapshot file to the directory and cache its metadata + + let filename = snapshot_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Invalid snapshot path".to_string(), + ))?; + let final_snapshot_path = self.dir.join(filename); + + // if the snapshot is already in the managed directory then do nothing + if snapshot_path != final_snapshot_path.as_path() { + fs::rename(snapshot_path, final_snapshot_path.clone()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Failed to move snapshot: {}", e)) + })?; + } + + let metadata = Metadata::parse(final_snapshot_path.as_path())?; + self.filename_cache.insert( + metadata.small_id.clone(), + filename.to_string_lossy().to_string(), + ); + Ok(metadata) } fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result { @@ -69,22 +89,64 @@ impl SnapshotRegistry for LocalRegistry { Err(_) => continue, }; let file_path = file.path(); - let metadata = Metadata::parse(file_path.as_path())?; - - let filename = file_path - .file_name() - .ok_or(DbError::SnapshotRegistryError( - "Could not load filename of snapshot".to_string(), - ))? - .to_string_lossy(); - self.filename_cache - .insert(metadata.small_id.clone(), filename.to_string()); - - res.push(metadata); + + if let Ok(metadata) = Metadata::parse(file_path.as_path()) { + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + + res.push(metadata); + } } Ok(res) } + fn get_latest_snapshot(&mut self) -> Result { + let mut latest_record: Option = None; + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + + if let Ok(metadata) = Metadata::parse(file_path.as_path()) { + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + + latest_record = match latest_record { + None => Some(metadata), + Some(existing) => { + if metadata.date > existing.date { + Some(metadata) + } else { + Some(existing) + } + } + }; + } + } + match latest_record { + Some(metadata) => Ok(metadata), + None => Err(DbError::SnapshotRegistryError( + "No snapshots found".to_string(), + )), + } + } + fn list_alive_snapshots(&mut self) -> Result { self.list_snapshots(INFINITY_LIMIT, NO_OFFSET) } @@ -108,9 +170,38 @@ impl SnapshotRegistry for LocalRegistry { Err(_) => continue, }; let file_path = file.path(); - let metadata = Metadata::parse(file_path.as_path())?; + if let Ok(metadata) = Metadata::parse(file_path.as_path()) + && metadata.small_id == small_id + { + fs::remove_file(metadata.path.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Failed to remove snapshot: {}", e)) + })?; + return Ok(metadata); + } + } + Err(DbError::SnapshotRegistryError( + "Snapshot not found".to_string(), + )) + } + } - if metadata.small_id == small_id { + fn get_metadata(&mut self, small_id: SmallID) -> Result { + if let Some(filename) = self.filename_cache.get(&small_id) { + let snapshot_filepath = self.dir.join(filename); + let metadata = Metadata::parse(snapshot_filepath.as_path())?; + Ok(metadata) + } else { + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + if let Ok(metadata) = Metadata::parse(file_path.as_path()) + && metadata.small_id == small_id + { return Ok(metadata); } } diff --git a/crates/snapshot/src/registry/mod.rs b/crates/snapshot/src/registry/mod.rs index 039e5d0..3590bf1 100644 --- a/crates/snapshot/src/registry/mod.rs +++ b/crates/snapshot/src/registry/mod.rs @@ -18,7 +18,7 @@ use std::path::{Path, PathBuf}; use defs::DbError; pub mod constants; pub mod local; -use crate::{Snapshot, VectorDbRestore, metadata::Metadata}; +use crate::{VectorDbRestore, metadata::Metadata}; pub type SnapshotMetaPage = Vec; @@ -26,9 +26,14 @@ pub const INFINITY_LIMIT: usize = 100000; pub const NO_OFFSET: usize = 0; pub trait SnapshotRegistry: Send + Sync { - fn add_snapshot(&mut self, snapshot: &Snapshot) -> Result<(), DbError>; + fn add_snapshot(&mut self, snapshot_path: &Path) -> Result; + fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result; + fn get_latest_snapshot(&mut self) -> Result; + + fn get_metadata(&mut self, small_id: String) -> Result; fn remove_snapshot(&mut self, small_id: String) -> Result; + fn load( &mut self, small_id: String, From 6ba7f97eaba39cbd7ed012d46021422cc9c53d2f Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 04:58:56 +0530 Subject: [PATCH 21/25] remove todo comments --- ...-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz | Bin 6248 -> 0 bytes ...-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz | Bin 6443 -> 0 bytes ...-01-05T04:54:17+05:30-x4703aff0-x0.1.0.tar.gz | Bin 6777 -> 0 bytes ...-01-05T04:54:22+05:30-xf1ff6ec9-x0.1.0.tar.gz | Bin 7102 -> 0 bytes ...-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz | Bin 7217 -> 0 bytes crates/api/src/temp/LOCKFILE | 0 crates/snapshot/src/engine/mod.rs | 2 +- crates/snapshot/src/lib.rs | 2 -- crates/snapshot/src/registry/mod.rs | 15 --------------- 9 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 crates/api/src/temp/2026-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz delete mode 100644 crates/api/src/temp/2026-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz delete mode 100644 crates/api/src/temp/2026-01-05T04:54:17+05:30-x4703aff0-x0.1.0.tar.gz delete mode 100644 crates/api/src/temp/2026-01-05T04:54:22+05:30-xf1ff6ec9-x0.1.0.tar.gz delete mode 100644 crates/api/src/temp/2026-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz delete mode 100644 crates/api/src/temp/LOCKFILE diff --git a/crates/api/src/temp/2026-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz b/crates/api/src/temp/2026-01-05T04:54:07+05:30-x7e90fe57-x0.1.0.tar.gz deleted file mode 100644 index d51bb76d94fa559ee1cca3fde94934d802061562..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6248 zcmV-u7?7tOkl`CKr(^?l3@~ghBQNz zEFc*P10p%+oO2XVkes6;l0hY@AS~{Cwfky!Yxlj~+E*KXpFjF^oxXi;Jl$RQ+c2~k z&;kvG2_P*HPyvXT2t)vm0>cH+CO1_DJy zAVfeQP#6XU{-ymB5D1YlR2U=zg@HjJ7}4*`ATWpsNc49A?~H>xad8p-SNU^S{>lC? zg6+?i^w0JR(I5PO0ruDDKhEC4{=X&vAu!m#&i@PFANl|H0ZjZSOKeL~hJESn|EMWi0FI!??){MJ=0pW*93S?GAFoHbsTSq{ZHE>$J6}LklJN3SinCwKFH)7jB-aqx8 z5&wn|lvp$l;n2#*q7WK3&l78LqD!oMMWl%r4pNIq3+i=sA^iMZ(T^f=v*DT%g#=vQ zaDZOxjSH7rw}<5Bkd;D$k2tVX^Yr+_m6fO_zvj@S8d>}r^{12b z9YfkUo(Y4}q@C1PJFij};4^{p5RrE@#JNd@o12?hUJS}?yHi^9#J?P25M?B_ihGJL zEOfPyW#rMm{60ojnJST#&pg~eDVT}^5OPEEin{OhkeKU3SH!F?!L_-`s7Ah@AM5Ep zJwDid9|@;+4DH1&H6EWW{aQNrF}kX#Ey&`}^=YwMDNaFK_bR=z%4$sF+5N}gY(ipH zV~PoH6%7Qt>UjvE_xPOnmaQo>cGvdj&ksZBV^6)BB$dB48DQ{HUJqopCVLqn>N+G7qF6+}}tZb3&Yj^MwmL-4x z86#Qb+U|h^f!&}cTSrBExvaV^ejgzUo8W>x!QEVtC@a@Y{okagM)<_9HK%edBfQ^70dzzP#$#14dKjX9D?Yc}7j15i3%6{{Rpg#uSH0m< zz|_$hkcn{Ux?g45MdF#U&iJ0+Z16;L!SV2+ABeb*ty>1|HmH?7k7CKTCW#{{w0+J8 zllGPEY9Lw12P|(bw=;KT$>M`{qk3EHAh7nxuqEkH;p!5hmTzYBDj^v)&l*{m&raL- z1N!+7Q)SL%KS!?SwRNb?3#aj)9&g49olgd^O1etb9dt9$wT~KcoU7&FP7|`e3kmj| zGksOJyO;y*l6A!l{+P=2^`KiiFG^`$6pP#2D%I7)%sMQ3{oGc1((3aLQ^IbvZY)j( zl{M6UIaKUEue_&cH&;%cgGy-cQe)0Q=wjqRI*(tf-bNVE$V_S+Eq{q>YyJ_xlXs(W zi*wh0SK{;l(8vBzJKo$=jy#a*YStZaCG!M-SC*cx(b`j&STB9frMxuvif8MkOJ|z{ z9Z}~FBi6n95|!7mzaDTo1XIT7mZ+IcH^sS}7Uyun4s$t|Zt5$~I=-9Z+tIJ>t6s_F z&f~OYdF65`dn}UStBHFrIHh@|Ye=H(Xi(zJe5MWeE%KdD){(+z!u-b4O}CRZPPU@# zO(XVm=JFcN(*#xiv-_iuPi3w!jGyb8#JpwJ25K0vT&XEV;WwH6zO>&+b)_gamzH&);hOhPZ{T$Kl6GUdOP=#_u34a_vXs$p=e);htn zzxCv;l92beK#eDLUdzwEmbsj*Pbk)KZa;27C6H@Fiu@laJQ=>Vc7>;+IIPwtkx+UZ zGhu4o_ExIa`QUih2*3Jvqim^(s?w%Ghw#qAU>MI?$!T zA*rpRV{ebCq#-DM^Z3u$PN`lsf{d%J?kIYO>^&nXgYEs{Sz$#vn4gW}@KpMS|~)-2_&_p|*r@1xC)9-g?&dr#vLiqY>Vhy!DEGBTfD>L9@svfK*g$ayR@LvXZR zEmjgKY5DNR$M{_PB*H676I%DtUH&3TqiRQyPxu&i zgzQcyPFgE={Io~H1wL2QD#z%?qZIG3hQ-?yAV~R(61MZC>-Jr- zExVmxJ|D`hB5$^?tg+rQ?0O!VZu+o=JgY=CNUP+nZbzYEsIGRXLTyot&fdwatBmt< zAZD-r*S5XXuYosfre^A+kzVlr@xaV{{~NFM6oFL9iTZ#7mXb=9aQBBNd0UIe_3L&% zpI?6YGOcv}AtabZj<8NJy~~Q}V8oER*{S-Q>ZZ2>cV ziB$<);pgqEDYbYIUN z+q?c_qlC%DxZHV;Fz9(i1F;oww149qe%hw5)3bOYL)|K{dd|1ppL$XnkzRUYcD_I5 z9l7#jEONWv%v8j~J!plp{?c()2-xiAr|+xtl^vIk+XI<~yh+{!f3OtONtoC}Al-DQm*bL6WRnW|k{g9-a#7R;=y%RV7ERS|qhEDRk;-}AdxURpu zG||A!bwaMIA8@6aJV#&N?%T4Ki02@jW-lQ4ZZq2JgNktTdGpta*lVy}r>s{Q&ZeH{ zS*{~x_rdZ3^WWsKJzNZuoSrzlk8YvMAuIi~9o>k)2-)MO`eGDblgq5FA(AH4qP^e- z@w!b1EJCj5+c&C3|H@Ukh`VUQp!bWf2ObncW2}n`m-aG)TdQAv#YxtIVo$axZkFktx?3)$zt`D&w2dXjeoVD|bs;!uoIhURJu5Po>i*ak~M?&#B+M3&DkHf0Z znAFscd;qE7Z=QL{Tny)%KNhCnPgma>nssoW)$C7sDZ$;=v%k+mjb|%gcUa8T$a*#m z_^i-6!8L3?Am0@D{m1aT;3qtfEDkMb`8w^UI~-SDE;r}fGU87l$r*}584?>rJ@bCQ ziaGF#f@x8CN_2&WIb6&9En{R=oXSoPfX`(s?CnW(|P;9JdjxzBUr#@JK`*A zD$s@GPj)Vgzrt3U4I#u|na46M$|b0!aT7#VByNqA_Zr0NC{LnPBJ>`_hE&`)^h3(c ztzW+!-44Y14r;!+rWykk-^jSFT@d%Sel>h?IMS>AV3&Ggm=-RKjRwYuY`mb@{<2OP z%H--IY1Jp5yGITcRlWZ0?$#lYNDpfCwc0lk3O%HUFtIZ@E42@OuGab{C+b7{(LW}M zOs@gIOu9T)-XGc0p3d?U6Q|Vm+Rq}BLP}FKAFFwVF3HUsyDc(I=_@TPr8U%`K+SI2lEw>aFrv7l zz3w+{+#7_6(e1OV8WU0qFE|p551Wg8{^0PQ0d;MFl+|E`4g0U>URoLfun*y^1#>5} z*>pC{LLIv$Q{%Y$ZEF9l{L5oMYPT4Z^Rx1!P4Z776Si5@B?Raa-CzC4=YV;t1yALSMhPlcJJ9Gp$x@P+ zrG{~s-!H#(mbjh-NVj7%`ss$f=Nri)jPx6OX;41=UUacapFW)|0&wG?VGmsrv2vKG&%MUvJOUKNnkxAfSCB}R9A=>(O@ zp#x@}v+qrsT#d`63^fxn3juA33UX*f+EA&%<4mPbE>Gvf_#-_O>b0nIaZ8y~j`fs> z(;9`;;hxryn5;2xjb2cb648l23~0Ywj2eO5DtYhc$Bxf-9v02!juiK;Q^}{4Hl|Yai#DjB&=eW;tyB&tuC`r_gWcZGwwx7Sx=}Tn;jo z&Nsnld(M5gN$5;TcD)_m%3AuJol`zhvh%j%&!mQP6JV+p2v5A=`NRtv3YnCtXTC&T zHKLhB?i!}^#k?9H!zkt!{&2mTJ<(fP$IK{N%J(cQY`eDd2QM!(vw^>QMGUXZL@6mn zOR5uqAD*51T>Nfx@ylrR zLz82=Pe`1udKgQRA)wQo2AiF+s~pqTN3)?yD0aoX;fa*yNC8XuPnU?qEdZ~nhZF`y zMdtCkD2rl?GDv^jff5^7A4sR8|IB3-sK8#4*#{g&H{ z2o_-WyN!s~tD)nxd)-2-!sIh`rPeFjY}{e5v`k{AerKXIl-t?$0W5^@oXiq;69B9^ z*KA-vcq8-q@d=m4$-6k~Y$)9?0dP4MFVS|qb@VPiZTT2U5_H<`6wR#XH@^`l>!y7p+d^ce6xi^IFl7T9{`txeUa0qav0JC`HABsYROzD4AYOi(TO|KQ zc@#;0sgrSE<7*<4R$xWWHMJd*dfQI2FIlnbqv^jWuQ)S(dl~1P7hBP^GOZc+phpe; zvOkRifO%8VS&#&H8eeEgD9TaUeB$p=O3XKx*uFrT<>}xcBy#JheK*#MXhZ5#f80`B zJ`HF8%ds+Z-)JwZi7GHXK(L0$AwL=8(iLEUnpfJZRB<^H`*$XpLLA<9Fbh`FBj ziK@+`Q&f zE4f~*m)!T3vP?4K_NcZK0i~_(%`iRf!c`vV6=m|waCY+gfqMC?Xk{nw4CyGJXn)yv zaT~-*`b_wk2KZ;z++-a+SB0dP$W*ndB$L%8$rE3{o%(#wkIj{3d|_0x0_tKCAN<&_ z{<>=hBW-F>Ize)Lq0IHDeIyl2asYlho(V!E;c_j zF&@DyyCiFqL`qb`4loZ2ei?eLVQpe3(;Z*3cnml z(;m%z*!@%aV$+3uprC1E9H?57u1#$k*GQxi9!qq`+jcC$q_#%cakyI)C;Z0Tj+8&MM53MAO427H_&_OM&`+99skh54fj=# z5ovzq+X9yx{ac7B1Fkt1Cd@{ds|H_vv0=iIRH^dNIFi%lk*xZQ8~}hOcomcX^bX-F z$!zV`8&(ZFce-Bnt(#k+2Q;unUMIQ3yE+m>N~AB?*bk}3QX7nZ?!USWJ3A>zE`Gp4 zwy#aNSt-Ca-ECF7e4n>O>{499@D!&^g6PV$0RH;G2>pzbt!=1&j@1Xko!fN>MXiFD zlPeNzbUJS{xoiS#*1!9?^9F&J?CTmo0-F3(1-(_RKVI^Rn;b8-X-df_C4asGs+u9O zrZvT>yG#SmP0GZE(j1{BlNDQ}m)J3fxNT0l$>mkkP@ZkeO!a)|I7Ge-2Obje6jD(B8K~OXT2u2{FKqO4~Vm(|4K%ycjVWb6G z3?d3fiiutv!T%Kew-WuAfk9w2NE8NzL!d~I2pA0miy|+Aiz1O?Xt*d01%X09!Vm}u zA|eb0T`V`~MgQP1APn)lZy+%!^dhh`&fW=biT+bm2YYKQ?k_8_u(%MUQD6iTA%a9- zEKV^b5D7;@AQmVv6bcs>1p{FyiwmI)0)$_zbc7fT2!bGh77#EHg$9AZ(0^{ZyTio* z0@M_AoY3gMsP@nP$G_D$1;p=>w#J~b&erx=XMi~P-)x?RwH+D@$NawSKOy1)yomnb S5B}i)IsX6w1H7;RC;$Ls6GUGC diff --git a/crates/api/src/temp/2026-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz b/crates/api/src/temp/2026-01-05T04:54:12+05:30-xd3aeb3c8-x0.1.0.tar.gz deleted file mode 100644 index 922140b5a0429aede4104c5cbe3ab58e829f3512..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6443 zcmV+`8Pw(gn+C}s836%lazl4RlO-xa zkfefui#}|>~(6@+H3E#POUn1 z_V=J6D2yeMOzxgP|C7KG{>2j7k(D_{ zJr8(To870hSmtX-=zl%yE&vaDkgP~zQv{D!Ak=4oHS(jt>fsq7-JTxtr-USD(){7; zNQHoOL1Up6Q8B{4Cx9v(a6+TSZG$dm{7VC2-mSV15wci?n3I0PwkDZ6!pjA{VrzAj zaw_Y+!!cp0t$$luJl4{=O*siV1!azz(hRqbt|DM0JHF=SH=N!ea{m0Flpx;gW30 zA_is(MMJ8!FPjDRu3}m*DLf98*fhd3ZaTiu-x$4e9l0Nv_Y%-(-ne)me z^YYv3*2cQ$y)qq9|TUtRe!Vm-s41r)QVF(0RCdti1^SQU1^U4+29$;QzJ%VPY`IpZ)(& zfIE!hFBg{^M$~U_{_-_Xd*4{))3W!Z_K8Lc)xzAfdmfL^7_Z08*0oQ(@r98jI3Dek zR3mVY*Gqk*O$36gwewL=N#&D(x2_qb;)Y}8DPrH!;fPfIt5wy$eW~Uf8K&15p&_Bc zja(l2HWtOX)eb!7$vUvK$F+`cowah$i$^g1w4@KhOCCDk@9xd+Y13%}JX!JExvdi2 zwaqBn6Rpdr_LOTt74QbK)iFXPKP}3fc)7u3%71dyy!*y!SW{JzZwjlVU|^^d^{m!@ z7krPc*1k7Dzw&JwQq5ZQwSlk4F1bu=W`j=ZVth_fOR3l}8dGah*X63;n0Zq4y{`iA_BpWNkxlMJGPg-IsM)J% zj@6-gZqzz7;-k<2UA-4E;mCo36#iPNJ=ER&npA7VVWvcCE0$kzM9Y87Yo}+h&61n+ z#li<;sdLyV81_{C!Cvw$ADUdkckq%MT5NiTW~l;irv>@sqT*i5(|1VcOFWw6r=s;O z!y>TxH=MQnXG|D^;zP_kg}NPDl|@>bzkbNaO!YVo;7C<&uk{K{5}dF+&n|biuZi5J z&({u;M6d$;D6RFVC$hHr3|M)SSTw(4-I29ORpHkX^6^O4k3%0X#6&4##Jnw*41f)2 zoNQSd%{rqfYpv6yq3(z9r6-d0d!paFiaLoAi3}37ZsYCTQqwZ^A$3pbLB8bcEa>7w;Do{Lu{lnRVR8PnR~J3ihRa9Tz^ zxwZ@9!8#*DLMhUJk|^?MTu=VP5A zJAk=oiT@2_Ac2sl@qrg)pIjO2Sfs=2$>VYZ9YO|n_MMSy56GIMi4zm{>ZA!rTO#eYJBNQu_0r4WnqG0umb(ga!7lj$~e;am)`Mhik|9*NK@~2(+vBTr@!C9 zrM5rKejR6S6^pduYs^_ABhe2*KXEn=>#k!#npTL{@b;3;>l{^lLy*&5d~{uF@wI+> z={mmYb$-$9Wj~XYVy_xyAN)(a3Lp9kg5-3>@1|zE*4ih=mmYDP8m+l67L+JtNRwrc%xtgA+I-S`@%P!{ z6Tx8zt$11kISkikAyx6MH6pv!>>%{Cfzaf#^w$fM%TfJ0`^Lqrvvjjdr;J7yTP|J$ zail;PUmOdM3h9#oUb(OOfi<$Oee`WpA){7zlRs^&;v&n8pLd2|)}O-8O49b~i}3}u z4zf8ylWm4EkC9&mr-Yn(RDSVMBI74~SJT@w{GMId85^v(9`PP|dUBnAxi3O}s49HJ zf`Jd&cJexiN0ywFu|I@Hu7Py1VrioBvBvt0zR6h%c}w$2*4pn~eD5=kqCJbyC?jk? z{VRDG**Bv_(kOj3z8awdSlem^bH?%cm`wdeL-?F<5%EDp%GU3%&ju%e?ZXOSvTsD# z3j9s`4??lXBKz0zg7G|jcRe$_W>aLE{EpO99-dCzn7RD2XJ;-B&ETLv^Zqe9$#^I5 zFfqQ5ox<^r5M7*lyd~gWx8C_lVSmPAs_=je7Z*p%##Z{tu#bpw^fP)6gOEhx${*3P zq-t@Rq+B0_rlj(u1tT{{A2!QQ_-$?ZId}~y&!l!^q8wKWbt{o!RkX9ky;WYwSi>)1 z#RB`dHnjt`g@oU~8(cHI?G_&YMqkX6%k0;Q(hc0%E2ag~sPbRLl%A1e4@@k8GVJZt zJ)aW%Y~4C{*7q)r4o^lB8Y>65Bk_D^O?%BTz7bzQXLvVE27WzhDYWkPo)2O?>e+K& zR5pfORB|FwZ0{T&?T52YH@{fp?wORizw?tjvdJ`QDR{|Yx(Rg=p&FlSiI?2r;Jajc zrnO#rx!csnJ#hIW_>Sk;ik)1&2j}Vh&DIVy_}(=qthp)B4Hw_L4?-?lFXVBpT7#*R zNH36!S>X$pKG&PLgUU|q&Et^A-#T^>N};~Y9mlEi={*qasSCog@znL@woDi#i4f~- zktg}fYh-gUy1QjN$-Hr=;4D4r$7T4nrg@Cty}cEEKd6LC`Ql#SWXtCn-|aIt2QD)j zjH^C&sQ=^%O5}r$i2S_)zQc&``vE2*NBl}>Gt3S%#Wrzl8@*wrkh|Zqe56y(s)Q$B z$~3KN5(CH<8Z1e6-SNJ{7-JbGD8!CI}=-N-0$K1Tz zrBW)h!ig+g+?{6wH_gq}3J|H>eRQcOpY$L8P;ZGqlb4k=2iko@$6g>u4= zmr`7k>|%N4vW9!a?e45eI%fg6UzTW5PtJ%#z0Cxi${_JHSIxv)N_w_Lo<4 z)7}pSbdyB}L_bHQo;KexYqLq{3}cxb5_APz?a4M&CmJH1C-_w$tqBcHXiUp)j>+as zWZ6K1j1Fh6OQ{T%_J|Hdv>keJl!=EeQL4LsXcW-g%GxHg4)(OA+3JLwcthT= zsGb~Q62RRVN9$D{ONSe4bz0!9pRTdT-0a#W16wj`Z<#nEFK15j?p^jnx-%{x57dV7 z!=0uY0xQi?Kcbd;P6yP%cX&?+`uULwN=Ie3c!bO za=@Q6rE-3)&;C{%Vr?_TGaAh)4r=wjUczY>FUmZ>qJyZ&W<3F1+j=)FV#uIWb?3r!5iuZ7GKGCy4Xsll8Dvu&$t&Evr66jppa_T$(ZVZc!7_*25t1NH`l z{y}rR9gh3(O=br*Fmr2%L%nLPBr_K&Y%)6*V0162p(%eO~}cT!b@GB@?DV-)*5`w4DQNm-<r3>$bJ%3G?q+ud#?KW>#1MDqeK;<3A09x7`rR4Q z^Q{a}E~rV*CUl)eAI?_RM1r1x8Ibg}OqRHYx0ax$CTNv@5MNSkfF;Yyiq!Au}VN9&=AL!tse8$wH*Jh--ESfp#Y>7-B_SXX$ zWcm(P7~f|~&^FZ&ln+)sSFt0V^M9q#k2F!tTwR-h*A9HBAB8Y`cbMRU$tS# zL65ArpQe6=&T6)()otqL2MjT|M5?gZSvYJ5#6IfWQ8%1ya5%eq1HF!qd2gr(Y8FtI zLtCaiOtHK>X7$tK)8IItf6rj8;P2R{#*YWOlBS& zT}oq#O>NCGLgdobTL5L18bjjSH+!YB;4QH|3Z8*XQ(g*NoVbSoG6)^?ktKzpY-f3d zX)JjOzH5?ysTx7|em5GhTgda^ewe+m`DebBGA10U)WWwTcdd1E_4`VBSYUP?wkVg4#zS`Usq+8CG`Msm*< zOK#TUjK{W&BHyzF)YRO}wkRnVTsCqBi(+9QiQr%Y!wLLvHDxzZBx9xj1=pSXsXqn?|*DU;!^K3X5 zKIHlBU@W@Sv(JUijKZw;#k;U0p;pG3)ZOR}lFZE;M*2sz8r}I752INbjVy!YJ`=Mn zg{f{|$K67z1|CFw7U-_{{OMDY*^4#ioAQo{G=++ao!udM9W{~8-?64I_udVXJbfQ! zE;C3Em0sz29r=jVA#X*_7Qmvn9zFNQfRS#+0_n~^<^IzDCmF~0o-iqZ_Dn&*bZP8Q zXsn_5_O)QIcjHfQ0@cN>R)46qy{))T#OET+PUiN4-?0_oZ>=2*hPEi$OA-&zMb?ru zXjpufdqwes>tmOUdtndNH3wFt{q68H6|Zp;ha8=0a(+rq6e9^o(3}@_la|D1H6m`I zFdh5YZ&Vtu9?-Zmf-KV7(2n=K=81O(N_#?oT0c(fwdrtj5%xkBknqm+@fMY=Wlp&X zDbk+bOD+FJe3`u{YyEsW;*&#JrRX6(jc}S|jMrzjdNYum|8z#{aiHU+vgWFg%E%%3 zJv&ad^Dth}Ft0vs2eMYhE!;J%&91Bh=P#`?&jE$1sGH`Nud(5LfOXAfnisMUd_ys=L-19)J2=0QkY|<} zW1L*BDC*z9uR$OwA%~zHZY8&Jl)WDf-b8~-H^pn2g0**aU%WfN6QPpgQCx1o5L{^( z&yppiNuRpD5*8v9>-Gq)*K3ik;Kfo|mm2CDc{7`UR!52t_&} z>CNjlNPDRymxHWu>&M%3&MHofYWHuLap*bQKWrAV_lJo6Ooo`AxLPi9Tqg)=yC{F~ zDCc&UP;d6}F=wWg@vJ}XzPJ7S$oE=oCN?09lJbr>(*f7190d{IBSCV*kL-s01vhd% zvKDi)8#>c(+*nLBgS!2p@2bzX}*2P(ey zw7(JCXtbBj&w#!64NSu};~LME3=rqn@Yq1fus8c}QHv8@Lmf+^ry_JjJSQa@rY^m?jO{P#NPitEM9&63%ggZ0eDzi?M^({C zl}59xgh_m8eX=8vf^W!-DlA=Pq1JsnIlA{r){i>kl@_GB_7K8Uq4fHOLYkf~(XD&S z6l`o940*N%X%w~+L9%+AM%uBO082jDgee^eA@qv@ha8#PcY)fkjd5cpWPKUmC9|E3~AP)|OF^6_f+ zNckN3e9}bP$@8Mgx$8J`LEO2@O6T2Ef#QO7yAKX!#FRk`x;2voC~8MHjq2V($+40z z&5%;wNr}=^S!8kAvO)w>^?jh5d-6$?A;(rdw_k)zcZT`ko=k+iVJ(3Q!M`XDC`TL? z+1B6D|>$*|2O>)4*w75{~$OF_~-fmKLKw54h@h1{5>)-KnQ?FxnTejR}Tj+ z215YFz(N35jDxuc#^ovnPy{6MN6f#8#pZwfEOHf%GPkn9Tq#WM4p$K%7z7RnU8%iL zaTFSiK|l})6bOR-V?VANI?F#+r#4QF^(vQKc@XZhgbOONvx|5X6}^Y!m!YiIlarT!se!2h}a{}{*l6ZotCaUyX4W^pZVRoKM5 z@zbiy8C3dS;bn;njYrtE^3W@WhRt!sVJ`G=2kkklEdW#Mtxi8F;wDEMarLMnO%LFr zu!yXnJ{RZ2b6g*Ww|tz#z{jbI1m-2;i3+&ONw*$gUZQ0ipH}~#{M|?O$3g@bG7loQ zjTHPc`HclPM8G#)*Q>gvW-z#NI=#U0S0U+L+;{it8zN;f3UOychV9Ld>Ti57isJZ_ ztLut!D?0aTMPzQiQ@FXwFokuyCbh`oQ(_-K;St=AJRTim)WM1gu#(sh9xE5PXC;y* z2{6Lr*#V4 zeokhKhRh>PzxL7{SN@0`oIec*Tco8U3Lzv0g@C~Tpr{a737>pS^iFiGgtXKeQUCMj? z`uFwNp43>F+C5_V@3%c%eb?z!5vBri31>%br_^X841p09osm$el9c8Wn&dnMe-Gq8 zzIW*M{rW-?tNPokN$L&{Q=;J8oK?HnqQdk%emsNs1c7%nl&k0_glGEUNgpS|a*a$D zj=OC&i_*=XgWqf)K3X2U9uYP-J}N!wGGRB}AB!opU?J^n{`N&dm-0}`$}&-1kCuEY zW|E4BjxJDuE`!D_`VX9dr#efUCkX4BFsV$wOP?gnsT9;r(W?C2@|rk7Uh-T{1N^SW+nX4n;zZS$E5 zFmJBkHMex|tZOe~{IJBFr?~TXiJ1au^_F=A!wEU1;K38B4==squ*SJb-2Mzzjg!R7 zW&nJ~7N&WpuIyowkEqO2ec{mJ9L?|kRA1#1rbQp6R4_F;&DP4#2YBJlIXtGCk+cbV zV$<>^a`W`a`691im7@(@z{HU;%+rFxT}uiaxEznwlM$8Amd71kn`!42Pyb?Nish%3 zP>QG1d=J8k(e^B(%6OTGr}b51d+C2^xydCDPm>6U`&aeA@ZGobkWEMT?1y^qgfV-s zkX-8jUVExVQxoGW7*0IT+=v(xx%-2ib+bBRoR?sS>4$-r0-2Gn(!D*pDO!L(7Uio~ zXiA+uQB$&46X-8!7d?z{xW6qdnX9ezOAXpXE`*S7!vq9o^!NEBcc2C7sHOUElgiSv zUt^R-1z##KF`2nAe|gZE0V&4g(5;tZnciL^Hdc5;NY{MpAfk~C0HY_z8QL0oRZD&D zpKH}(BoHk|)j&SAj7uN6+r(=Kkm}Si`jktLag7hu;}5Rhm#Z=b_~#l`yz+0Qaw0(o zC04c?+iyLtl?}$3P3||5`C!FCV^c%+!2&lPkA9L^%chhP|F#(!$E03BlRq0f8|%(t z1k-yNnH{s~<>&0uB5%CHH9$X4G_KI!qZL0f%NUXSTz~Vs{-O2QYmIV<5-mK+c2*+e z>o&qecHOqOSMBXNOA?he)cou2ichWh@eGFX3{}EYc;{9vFVAoj6R{3%RT_TLs zA0fm$^c<2Zm2o~CAY$9MO}pRP_=&o$bF_!rv6tZaeU=5L@6+N{p+2W0R`I`>BaF$m+e;t*2<%Tb8n-B&xzLzUwE$# zD9*3(_f3_bD@G2$Sl@$KXzUxhEf6I3Irq|X0@bK+`>jBd_T9T4+=oXyV0M{{O{BB> z2&0uy<&*!?5zQ5uL^}cqd!-r^n$gs{KBF+^ZuiZ7d9XI6 zDDCrDS&H)KCpEfVu^iKNtSH&aT-lwnVhRILXfwB3e-&%3-hqcHb9hhehw-Tl-pk<6<`nxPfZUZ_jd%n^na8SEZF2Y3x4<9-fMLrZnt@w;yGxti1PiH+Sw zXjCr4WEVgb;SMQ!3h&4q{d(BHZD+P54 zgboxHau$iWwhIq;XoY+ffb`-h{~NqR}pigZX+7k~42=mXtB z6cM8(3G=&YZtNX(LJL2^`?HCXF7pznX|`n^HmmTa3Zp02xu1?N&F^9fFY@R#Duds1 zuAJwEXUh_iawVt}hB`r#`^v2=q^ulHjyqGM=8ybu*JmEq_gSU&)dk@cFj{bz6YPuV zBIS%>e!TXtstOZOBe;ey5?5x@#IH8;*V7qa*6@!DH|-6&4Ey&M5GLLYt;!ZMrHA; zFJV)(SL^DN#IE;R!kxA&gDv{`gJpIZ3DK+omsdV;72vdq{pcgHlJ9u_XFa@`vh=^Z z8G666tGj#8#xAU9?WnkWOGqZ=ogCC_eS8S%%S-yH;-0cVvJ0yqHIzTe)9Pa#M-dUW(yKofEZ@kZ+`rnhc9Q! z=$fwm8{6#Cm^f~|VxTC@y$57e}-hdE^K&cor)Wv%vMALb-$g($m)i#b8)b5#A ztr6%ryS`!h zV0KKfw9;1S!}W~I?9b%XE+0ECF>N?K?k?Jl;>^UGz*bXk0*7txQS?Y=Ew=q)_%5EU zoR+IR`L6eSKr)r-hgr~hiqe(fxYOD~Ov6O{H2b&^T^Jz* zZ{FCnhw*6Q%H#rAu)({;I<&?=ydvEYB9R~z#Odl7T1>RD)bSj0VWjrSk*`@@%k>Vh zId1oZC0qW6p$#bRVzi7h9ykLuH;iQ}2Vi{&hWY4B7~f_*%>=6xiLj1h#&XSFp4Lof znv(%%E_mL#XxG3TkSWWy(skP)eq5MX*VXXKoA1$Yobx(`8>|L!V|?Z>VtrN-<1Vci z-Y&>gVZ!v)lnK&maxPO%x;&*Sf?@71rhI>f2uwADcIr#@pf3FUk$&#+Kwgln2N({Zfo6hYNfF|YvnT4BX=JdB-`x< zNweCO_Y0N*hUD;$*B_RJT%8SXMCVU$)5RAUJaP4xUS^}~x;S;qYKZ>mq_nd){84%} zfspYhsky5%rn!?9m!|Sj{otJoM}}rAskD~VoR-8Kw#^U-d#=~sTNe5!HM{*~{o&Vp zyBD?vGIW*4V#>Nx_4Lob`^Z>o&b~*_7g!IooHakbq-4?Z(vNI&$9-<4f!5h{R)&afqcql7^a#E!NzwL2XHm9zLIV%X zlb%>E`2v{r?Y8V;cF%QJLtj-(-hHc_p$&6}_svEAtZ_b_crL?E88sM!F5{pjFppAe za(Xit(&X^)Yx#ou!+lRO1AyTBgedBna|e7Of4lQs>+4ps`mOuV zV}oQ0Rhg^%znXc9D@vBDBoPw&y?&ydlcG(Nw-$<7s>`;hqaaqvW)ks%eTo-F!JZ{P zz|y;_#_2V$g+KXXa^bV_CGLfSNWg%rZ$NZjUZ@iXR{0H8x+{9p|DeJ0?4@>4Zntn& z^a3$Hhh7;qC0VqdzK$11ineR&JlPw`0$&OCfO%N9BymxoseTKd6ON~3D}6`8Y&Pq; z#&A@zIDWnf8~;Z;?XFaL$@u422S*PoTIWmTr<+3cR~ru?<~=Lga;>{(9LjRlx1{9n z`8X~Y_h3_nIl{CsD@rl=7x=R5Mt6hPtvCtvDn7z!_!h121n?gDSihe1MaLQ?Pw8(t z<0I8yI8(KLPNRMr{G3}%&SfvccV`BjaoMS7`2w41*BjRT^Nz-6f~wGWOV64NKHdI6 zOYY|tOqFnkX^GsE#jhrfBO7Yn;w~ZlWTQZ|@-Y%8n`)rjl2EdcyNa+dpIaB}gDDp) znbeL@t0r7w>_<+n|gD+EDhY^S4P~>mMqkl#BlaQ5=Qy1(tLBxoKahTg#QPry1%!e zRVrt84U^0S`+APv| zW&#HuAL}48h8y+>5Bir3cTYdKsDV=CJMbzKr}8REh3P%3=x8x_JNLia-w|{U z@oDDXw2-s-a9cMp>^pC~xmz@-8|P_6d)LAkKSLGkXX(Yqzgu|_-9>JA@C5j7Koh65 z1B)ydf+yFpX(^awek1=PNMlQUi)GT<1e;IiR}0?CH5FM$^;r-WJ)t8;MaF1qIA*5?UP35KpDBLpC_wRMixW^%Z zOUZM9h$8;%B4AoSq{Mxjo1WtGt+vY%vj{$h#7sQ{7Q!fcET66TIxPs7EjwgZBy5RE zx0c|S5y|XmyJP2MDqg+1>4;n~@q%zqca~%VUEz;cshqXZy_#Gtulyh%3rBCq1w;Z! z@F+U%a!FqDWMH^`DHD5mA!F+-x>Ezh(}A9Q?TXjN z8?VMUt_xLv`hu)Qo)*U>f~;(d{A$By_cJP39&xrr?pBa6uHeu1&~~V8jkFr6QUQ03<ps*^PFabMMy|iI3=?2soZnfR|WxJEZ!gfe84_bTWD@q z#yg;f&H8cct6Glp@=rbiJF8jq6~{U$Ewy70k5xr}oy&LB1}I7QQ!gFgb^`7K@-aUe zqMUq+=x59V?^JOVN?&~nI&DOYsOV2U=3#{)`>^6zjm9|bm4Usr>~N&VCK`^LUi7vR zJR9?f2QylR-%j;`U2}rgf*lAGm zxoni$H=Gi~+e2L7uFt~i4Ml}E6&{a++#ng2b@|>e&T((E?B^Z(zOXP+rlkh4K-a7q zh;p>ox`v$b>Yhe@?$ci#^Q~7Zz&;j+niMzQi*PPm6L^2H@CdL)SdR^Nib?WqqtB4qL=jXixaZNUu$QP(>!DOP&~5>cz=w9wMe$MjlTzDQ=c3DMFKSpIm$Fi1O=63-keN%)7GE{tH)}`VX}xH_#*bNjHGWA%LtL-< z1tT3Gbee;FuN1V?s`+~7?!Ibl*=jLme9yBGnYH9^x{@kp?^4)9dOBE6{Kq=hmku9D zUlZlX();3QcI;`P0zf5RHC*fC)w*R`a`_J)z(tpLpMgP72b2AS4OaJ*3@ZhvS zaY}OjGuHq(1vyc0Cp4!dP#kjedL#f~aY0T;w6%#d+TkVyKo}?txbgY#mjz7zxGZ$z z4L3oWqi+<(*VZ>4XuwS@2u2hM0RtuAP=L6ogcuqC2O-6P5@O;A2<9dS1O$wN!l5YO zjV}a(fk1E6K>$=74g`V3{w??)Hxd3Zu&5|R0*;ipNgD;e*$4^*0RtdNBnk;ZiXxCe z07L?W0U?k#!No}+Ns)_-{+t6>b`T%J@>x)zJJca zgYkF-&KeCwqi|>-3=4w-Q3!}N5P^n)uy8a63qwMHwvJewC(r@shLNzcbtL(F0f9jv z1Oi3^0)fCtIOwnKpMb$gz;G}G0Y^e1FvPEQFaiOAkbp@30X%ed!?^J9ko<4?^HKg{ z{~vMr&-TKf?G=(g_-6tCx9i`{$=T`uME^)I@;~bTw?D~0fIsw4(ggUE1@sGhH}o)t zDZfxJh^_RVI{|cmc{o+6p`=zTkf4$Vuxjd9SQYIr5AQO}T?GO0MY(b+M zpWW8!<3_qa0bajSS6XteR*TL*r1B4x=mHQoA&GS*7B@4UbwsQ(Cb)lZWWa=|7Q#T( zqtZiKJlqqGe=W_vq!7ZJ*Pk>hjUz&qtx?JD1keC&QZwFn#8yvb+1Q*3X#;an*38W1IYIZ49M zT&bY%c6n&6s6ODRK+2-`W1sR%x7^+!!_U_M>djs2ftN^?hmaINCZ(0Di1=K`*%7tfNoen++PaoAR+?P>+qzuWK>jmiQL=k?#& zsAE)9*u?gSfj2fu{s*`?S=+f{t$;`*;+GCF5Fi8!vjW1xFf0l^gvuUp-H*=&?miWZOYoUs^j^> z&Rj|JW0F~XVq)ruOQE8<03&nqldv4A)E2+}uj7-(Hh5R`TeUv60y;KEg?7q|q8}_* zIrSe~7O?6tt*o(mktAK9c)PoIcrJdXy%) z6U;-EXtOp;o@OKv9`K&b;QN5wHh+6*CsiqrXsHQ{Ao&zc&Alh-oew;d6bOe$GCzqv zSxo|R>5@IohD=vtzpx8xJrIwIqfK5?BXx3Nc2UdE=P}iN*0Rb~B>k?OZr)=fN-@`t1qFhf9_=v>HOVlHC#b0~p`)7;16Q15`J_{L z-^x%1W?au|7*CifE*P)yd3iW6?Xv|n-8JR(a}GHgnZEHR)WIRUJ;9z=t5oB$NnUGW zxyS=Sm00yK_V}zfB?V&@XBFRn6R>WwP_$!)}(z@*3t))`N!l-p;&`Vmwa|FPzNnV}@=X4m^}wtLA}dT0luI7**e!wN8t&YJHr zhc}mx3_frhV2E^Oyh#r(eCUg;qh%1^)?utP_X!Q1zUe24AKOnb@G_sBpcL1f8O3OA#jC)TvhW;bX6A zAKifmG1D9av@u%Wefis>kFA^3EA*V@p=iyJQ5|y1R_S zHV@5JfwETiC?q8aHhR7%7(XWu<{I)hjWvb*$&RRpU6ZMG;)WJqa(y`ig|0C0J&};izW3d=0 zV-@Y_EPyR~vs&aXOr}xSc;Gd%b4!G#K%dcOKFF%Ul0vo2e2Bt_cbU1J7HI8LF*h7_ zKY$TX4u0;pGZPVTtFjL;lS8h3@$Nl-nW_?5T38Cu|DF-k`PDV=y&30pr_PwY?FZAH z^h?LfkG9hp%EEJQJnFKkfCvjD4R5tsk(@fQo!R(7ei-=O^98wTZVaX$P#<~<^F-3;+r3$$dY7)#CCz@(m&4-}6m2UqBqw8gM(JGcklV&$&;b{0EOZtEYR7&-lP`5pl{c#kEh^-S4o?N$x24 z%?;@}WZ_*uuHV(ZviR896jJ7HUOW?KL5M#5uBCcPCe5w-c=cxs)4|?yn3HTNzif%B zYG)4DwxEOWp`>Yq`?I>^7iMQ$lqbHuJ5BA?#Eqy#&QL9%K%euIS({Z}`nz6+XLaw? z?spI)@5d7*2Z_%*g181O~?xoR48>oxB6SE227Y5a1|0*9;rV`2wi-eI{tZ~zsKxw zh}$9Q(|Od=7O{A~;@Ls*d;R#%SUd~*Md+3qL?dn>2ICz4)o!augs)>`!hJp%SyR2e zOnzYgaH)t=vMy`u#kKMmsacp;+WKJJ@{e1wz?lKHl+3Xj!+6$9)Nfr)&r+)C{dO}o zEvjW}<};D&XS;1JpB$zWO}UI;aJ`s+R6u4XFf~Vu0t;{DH%Ld=ehUHH3|JtzZ9Z38 zAKvf)*1EYJi+Suj8>+l8(D|s}4Wdnqy9x+Io0p)zDc@S8BXcN3lNvnwvSLyBvsT*Z zamJ@j;ix*vnre118?DZh&&Je=hsDuk?bh2Vn1;e60->DOed9-=u?Mk>dwM%kY{Pr{ z4t>g_t@3N`Dj`i`5p8*I1?r9A&!MbJ3*t(F+QZKJreJB0_jPrC$8<)UHFF!|*G5k* zHvP`r7z7L-sfgcJiMo_|i+qkuMX{?_qno1Zx&ojFyogT7!hBYeJ{w5>I(=NeSy(sx zXm0KreZf}J{LHn?H47V89oJndJfZYENbIVc`21rAXt(O;vYqL) z+n)|2drt^diDzXR5yp{`OKJ1uVMVpG%Nk_5zwLiDh@(7ULwx>TSmaQB(tankDmT=D z4xj<*;6Z?|nuh>wZRsB4u4FUvhx< z4+3${Dz}>bKbC(@q|$n7f9(Yi=f@ayi~eKJ$<=E!lj{tAQZg`?^$PA&Wyw&WC@B?v zu6!rJU^D9$xtg9h>$Nx8C|CDdqbt>;sCC8d8KB>OaN^nKsU}0HN{9pDe&4M9RKUik z`_w5qvDoMe&E=CZm114jr`ylY7u#Rn*lj*aKO65-m)&VEl9TFJZWJJ!J)%*lUwb>T5kcv&?kINh2)vJ*!6dmrKg6*cu7*9e!g@TRtNOqh z#%i4wzLd8kcR2BgsLrU2;U!Ab-`Rf$MFcvp-#9I?NN+7(OX5%dW+BeuYY>7tc}4qk zJbZxu0J_Fl_ExTG!soo%Z-Qi#a<=DQPEjN6nG3fw8ipn;0@ zWbPySo5?}!O^Now#;1236;tj=Bt`X#{tU0>N7jlYcb$i1`NU?euSOU~^`(6}REx~w z>RyC=V{q)5_$hI0BQ)M4URc}%tRzl(^t6ECp@7+2_>4wzz*I-K%ud}o)l^Mj7C@@f zk0-hIlri8SM~Q4It8nYd(OlEcro>e9NM$IxSgT&-XzZ)o+Fss);hNS37dA1P{-mGs{;4J2T zMxK(vT3TZFQKNC>a`h-q*nV=qgR&{NTc5r$UG3Qonf8-w${b6{+?xB+vvm5 z<@J}`HP+NB-nxzs_|sM5C=u{O4`3(fY8C9`*l=U9U6V@hr+~bt8vbE0`iiHYxW}$= z^kT$G`_Q*F2bQUI7V|=}nupYI%6+20?$7v%+lfP+jX!IA5#}{^Bj35aU)k(Dia2H9 zh;iMK``juyK}27>qk5ih&6YM@V6)V5U_V}Ejl|Wp%kcN~?ODXInrrNGr(DV-ZtW-% ziJmf&?VE`%K{BUy3m&s`^Eq^nOkh26b2b|)RqTrup1xBzGXawJJ5rl5%uccd9y`A2 zw|oT3g-le$>Eq?art9&lluu1AFg)oi4m5o>O=hDC?|zFYnrHIbW%8nySbH^I@p3rY zBDG^_epUxv^^WyPAj#(Ie%VxRzvgt86NJO)ZMnH4wn9r|tQa@ty55!?mytz>%;VrO zKi&0_%!Pc)vWEq0mxo9iMxsYNNelOqL*QoHwfM?-s#Cm)xGKzkr*Tat)KpN0svo{I`}t2)EB zoSy2|4BVWgLP!MHn(zUrKo8uC6)Ny-yYf0;ZVxa}AY2_{P8GLJO3i!wHrI+g{j*a= z=eZYV3}eEDi{1)XqM557D|+$M`s;tZDI+d#Xnfn}LfezZ`8B4k0We+X5l>1rS$Jv(?_`{y`kWEd7}a8!vM`JojqNS;wv!d;lD< z?$?`&)3Yv|q%pX#!0hVzSx8arCJwTLy8z?IkV}#gS4X)?j$3!+Wo$0-zV&+H7<=o; zF_}j>lhVGRnqEAvv$46{WD@ukJhaC5I1kC6!&s$;58RlEe!ek=OFa##X8S~M(Rf9Y zxncyabE#Q0Ezlciu8GXJew9p>iC6SzmlxM;00o{^h&{n)MBR`kXEKDo`3*OzzoNjU zBG%=Mgl4vkgamCa(xj^HcjiiP(Jr&wt?fN6Gh^c0?q$~D#?15al)q7X^Yd75eL zF*_*FI%63*(RWM+VJ*gAI97klsU@yHAQdC}_2LJHF~dmJIc6`exCm`tCb@nSrNgUm zWW5WAUp-6iQ*cyoQh$jAf~p~F%1W4Kth4)SIR9X#Dbmi1+?1)mE(zGuRq%MQd$8%c zc#;&FgCbtL?BXZVftk=?O+B*s&tK_Kj~yHR6IgU`0Rx#AGBV#EREpiN_GiZS4=AYf z9o|f?x*ejbA8{*tAfZId%v$B-8HAjlP1w|me^ z^!)Jso3E`+eq6w4pXABsDexRz7{!OtyJ;|U0$b(+TN^P2=JEnGTqI>L=58RE!lPqc zmGe>L!xVRJq2X8So-A@ndziUR?GvODBUD%iB3)RNh02dUTu-vPadYFtQSGhyFqJ3! zHXz&~!ZvLl9`>b>>2}7Tx@pp_`+CEHcc@4R#TXb#{q42+Uo1o$ciq ziB63EZ86c#&ovkI)fOJtr;nJHh zn-aBE_;TdST#Jo>tM&z(fY$0PNuUT_ML@pe4SGS7YXWrOdVT(_;)Akw6>-XkJr&RW zcEc|}xSu9}7dLv6Y4ldr@j{sL8rIE`{VF(9y4J14p(P>pc)!aVX>A*9=`f!la<`)> z($VK#WK(%-6-JCZUQ)Ur7T(YUe`rSkmz-O#S1sBZihW@G!ip$87-Hn-Id=>o$~69GiA8mf$`$?{KgS2 zkydf4_Sm&uy`0%U-{uEv6B5kzI+9DkmN!%I=4FjX8FPxf)aQIa0jO>9RQcsaRQb12 z)Byk1x|LEZU68_yFv8*JR{YU0n`5mG+>$5)1DLNY?9F`0^plRH9s`g&SSle(CFkM?sd0{N?=x9px`}6#`vyj*!@E`!_nOUKb@eA8+DE<^U+41 z#rHec0h0p50yVQwXM-q(j;Ga(g0zULn)lhWZM_q9qdsI~ywzFXnN4)BoVuPAWan;g z=91BF2drB~%w%t;49b;+-;8-RsKlN-f?@#d2SOOdNcuB%;wH8@xx zIqN2ZA{nbOd$-?_tiYSw;wM?csJPl<;^Y(nT$Hr$L{xRjFeb+F>U?LB32ZzsHB=|_ zcqDi}iZ%X#eLHfGd$wqz4 z4}^(9xXeyv^W-f9ne{?!6v8j`=~CVTM!gsi@aHxBXsh&ivXosw%>MPI=r<+vP039{ zu5sFPZCeW&Pp!Rw;4pxcyv$GV@#k{F`0n=?>KQA!wfb0AM8N8h>r=kS_qJTX>-dv&=``CXbE*_Nu*9%ZYqTss_r9#sokv_X*Xq2moC@FTPGq z9iJ0^TWI4Ge@6ckW*wGZA^S3sj@CCex`Sc2xZ7yyUi+h7cuB#YDvF8d2eJwZ$(mrg%9T z<~UAPbUkrGYVgFJzo{aJS>H{gCHE8id!#Zfa3+X#Ca{p|mfN+k)7g6(jHq>RzmIT(AkBp!3NNmUyQe*4 z<>4df^J|!Xci-p{RJ?b%DddtxietR`tp6}ZLZLxOt&H3+Ya;1O^v>dfYe%=>R}?}W zc)|69lKb`CdXuDzB6s)~#TK7gV^6j;o0TIum#=ACBqk*PUva=V+TwApZW8xho&NsL?%&3L+y6yE z{(b)kj6^{H=>Pl!@aD6{@=5XiEn@(D;(S<)8;(!v*T+G^5oizsBF^WEbFgs7x%^52 zk^oD9e);_CuzadC0VB|G zS)sttU+;gP%+<}w1!IHz zQ&wjuTSvElQ#lxnMdRRDuoVQ0K;o>Szv2;AFa!b%0l_gyBn*Q@gRNmOYd8*tg20e4 z5C(z9!*IW@2#d%5`Y-;a@<-0UE|5=MS>FYR`>SpLysrDZ8>?*f`=)IjaE`9FPL8g8 oQqcdjJiM(v&JpAAJM4eZ#O3oL`GY_BgMV`V1@gIQmjEgN0P-6z2><{9 diff --git a/crates/api/src/temp/2026-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz b/crates/api/src/temp/2026-01-05T04:54:27+05:30-xc8652469-x0.1.0.tar.gz deleted file mode 100644 index 37d60b0da92fc56b3442b6714c790f8e9239804a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7217 zcmV-19M0n(iwFP!00000|LvM*P*m-fsA+N#0R@zdWXaHpMi3-Ora^)P2@Ty%ZkmiB zsZkJ+AVD$$A~^|?Bqzxrl0k_IA~|RJ>YP(^&pmf)=A4$5l0PAi*;Ek-?T>TRFQ{!%R@ zlJcR=QSu-3DlaE4yJ!KRmK@P;=TGV{skBeLFr(Ptq4I9k)J4go72{6&j9MDxY5>G% zcljdMjTLbjf~LZ&Vqkz&ycM>IpHV41;tNk4`pn?B7g`BQG_kaz6u0=`R+ZRYMt}op z8Rn%w4qlzztGrz1=kVL8i~pg`+d>9-xufYOwp8>DAo9-56j4KS#iC)g>d;0(eZWbP zw8f{YVWmX(yidV~P3!=T4j-AHL$*qZ0n*}a+UG~V!!+Z`Jr9&`YDT_h!2}s>i~^24 z4lH^Iu4v~J(fZIh3>5x0k_(;wdG@JQr9hebz46G(RY2Zqf6-4<7PaKtQ=^lLizk3E z?eeV9cz2bTJZ@46(J5@dy}G%W?dmf2wQ()Qo8E$v*#P{KezA=PLM4q+=x|&_cI)r& z-qqRK!3}980~XSk|+_FwKWnU1++p)NFYRib&9Bs55Ygn zzu*57K!_yd|L=eBpa1_q0*(mszdYbKOsL{n4U$VU+>bP+`AyBA<%qxFSM zro73-gos7HTbVEu(`ZxK@9thzW@E`$SQ9$UbYa8FO7RCRILcBN-6ymp_r8q1;o=fK zwvw$6&p`Pp4Q*!Ax_IDyoeexZj(9YE5|1Zu)8!UzHsZbTM;G`$7gqjok~x~B8ygGW zQF=UKfCmkNZaw%#6DV3EkCG$DFz{7q2Gj^OI;;jMxao_PJS*XRugK|aS;DAP%XLwdhiw$M}n3@KYK>K?cL>0M_^1EqmyD1Ch*Cc5j$Ya~>bDg+! zV@3L2K5&@nxw{=7Yj!alR=+jraUqAt&TtQ|F!s6@O8J z(mjLGcMTjZ()luaiywClmNGH*hMB4IU0u$lyF{j4Wr%oFhej1}gKxdxgAo^$|cc9r9U%@Bo5Z*~Q5CIxo?! z8A0{W+l%WtD`Y!qMCGm{9OfU|@@Ass*5gWPJ+~TEM{6TQza3%|A9+fYQ6Jb?D$N@X zheloQFRZt;-JKzSf!i{FmH`)e6tBQQANc5==3QX9Fj2MwNdy6P9^1zeM(P}S9Fj4P zMV&1NNXeitL>AZI)_J>37mRBIjJ5FTV zZ50*+0@_03jL7xT7M;RyW|aQ+&wRwF<7?xlOb#V;2SdSwOo?(H0|!GYB#n~T#i(Tf zW%V5P4ivv5=W87(T>;UnEh7+`KOf3x&HJUPWg5jy$*_nl`IJtgaC)!>h}uKRkEJ znlVh%u%oYgJe9(b_ddbJB(+V#27p zv@~T0)u_u#=p1{%>Bl@z=kw7MF}BrPmsGUg5rQ}wB7jqv9&#Lndp-Lb@|JO$z`Jm3 zyJF-dtLZn`^}$PRT>}QBhTw!+)pk%Q&sJZejAwgxnDs~%|4-R((%EmBU-2Xtf9HH; zr3+&0h85dBPva$U>C?T_)AX%S?9u7L>lg6Z^N&=u0ZGRLm9jAjNDki`?_B zx#o}$$qTC>9OE5TGi;vUeg~I@Tt=n_*R%VMG?BsOj4S+B%-6dM*YkzDjFn^fZQhZt zCw+&F&9ipCcWnQ`b=tRF+5eD_R1WLCYM_zZbhyBNSeRcSnE$GmIpkJlY`Y?5&uvDb zRPutfAY!C@fy+sFWz+rw&8Er8yWY#!6I-GU%{P`7sP7R_i*j9e9)H<_6o~>0XBJX~ zpjj074ZJpT#pX_v!)!|ZesIgld2eRj@sVT?)uWFy(0v)&Xiw-M7}8=ALQL^~A-~$n z^FfIp=*~e1i>I0I78{N8Sxt14e<2xTQ5qLLudX%8m{O*8Lz{gFx&mUPt@||RQ+)lo z-q$CIp5wtw+|KovzNyac8O;r$1A=MGQhf0wC^BM)9p<7b<OeX#e8m8M&L4cU$ur~$4O+X!TFm+Z(a^$aj_nYAU#hy;f_9E+tcdz?$D5`gksJQ z-y9t*W@;MD2X$S`Lp;MRdFVcUZkb$xcFEwg*P)p)*Pn$8mKchDPp_pE}u zQPoi2yIjjK2zl*Tm~s>i?tHMIo0&XZHF1`(&=(05W4QYDa9K=?!~3Yr*YaIwQEuim zVc#C(^M{{X#QJF0sVfDNEbnIOnECJQJ$!utZ@1tG=+jDM_g~2?^Z$9m-aW0FATmXG zXu>y$;t_Z>X(<~6>rw953AeRNN8;a}bgFpypT?HZIQkz|o9c}x`f^5GnfatYjBpwZ z>QJll9v4uT?3?2IRHaPrYc_nr4>v!m=*npRu@_L|w8L+4ay+8gIO^1Uab+YFFSS>w zpFSmX<7R)C+ElpkdDa@o*}#*HWq66_UeRlhQA|{En#RWRpj^iEKp|`#^MI|21%3Eo zAcTDl;@9(G0C?f_EOmv{T4=`eTQPSfBPOc!{9X|jXySL?v%i$*Bvawqgx&M?7p=!y zbsakS9#-Vo-vWw#+#@{T=!U%=JmZiN8=MHx?u>(Nt(RCPoJIMa){P|T+i&bFjR2`g zEfUZV{4`Te27V-WRcPAZs^m;XZ#LTQpKS#l5p4q#H-&&y0d-h+ZJX-5ln8h2u0Zj( zss|{^j#V3A*LDD7-h+#NdHnaPf)uD3%cIk&=()Ou)3Pr+`A<$CTI{-sU91$)U`|sP z061_Wi|WzF@iKpN#PITbr=_$Wn(;yyD%k&)F@a)O&<#csdNP31A9nmFpZY|zECZ7#>gkPuZ9mx7(`2qiBWp?}7O!pA)>ZNd{ zc%SfKZeX6G#my8Jcyb)ztK>UXgyN@4p<>#gyFzC>&Q=+sfVB42N6E%MAgFX6;bPmb zEKyI0+_&XL<4!bsM{0DuSB)1U?{ACNZB@<)z(d_MKG%uOo?$88u!Y>{8wp0CnQU9q z!mT0{-gnlGvXidPD+ed{&j?H_(mSCI-#;VYc=$2*Q15-8Tl;G3ke|)>@A^cBIb@an zpM@)hQ<(+xUd_EfkGE~fx^CBULigir=yFRjDNl%CMb$?w*_eV^JkMlvQI%-fmDXH= zOw2sCry+fJKb_v()6jcGX&d8f!Ks@O_02FRG3}%zG^5Q{Wxn(0J~r8yJKk*u5~2r5mx&Nv_wI=*J&Ov85Vi0zij*M9cc}CUkA7dRkUYYz>ae ze0%J@BS*U4M3rrKfViXSn#NVO_`Ee&htEP$QSmn{->=Pgp6;tA!>B>?NKC?ujDEA2 z1n%Vi68en|`i(Z@;brdk(K0?0R^gt07&D)5I|2RvFFxwGq@c8g@-EBgF|`<01g4bi zDa&Vv-u_iiUxcV%k-@D77(UMyh?m3i|g8m!&N!# zNZe?~?kda`&uZ@CpnDwwPFXTvpM&hQ;diY@OEuE<4Q zuPr^pbEPS-38lsG_7Hy0hxjz zXMEl~jJw(E^TFPB^vk)(qUt5n-S;yw{02*3nCgJLGmZCW?*{?I9CxL+Vy3-k%eWmt z7G<12-h9g{e8GIu(nl625Pg+-Oq!ki4T7YGv2kaJCZZjor`W~4zs(IoJjzU(X^v+6 zYLVWz@@4_S)|A6=Oh~iUofn(Zlq&<6CtxW!tsf7p%rV3!s*r>_N|3$?G9 z#le_OIE2V{a&+-PnlG-E{K$2lg?=xR3Ar{x4V9P1e2@1gx#~bz_7s@)N_&c7ux0B! zcWRoGGibsa@MR<`?G}%nT&QEp3XhiIPVC5i<$wihzK78Tq~-*1JA@M4YGQ<|Emxz@ z?_n1c%xF%vOUJg)uQ~Eko;*FcKTlLtcwn0pT^t*GOqxRRtSfnGKF|86(XRieM&iPt zSQB#eSZ@niOWu+{7v}Nt+sxsP-F)wnPr%`<1R@r2v09E|S+;IU%C`Mk1Lb~%pe+@q zd;M9B5nFHm@HTU%ihm`gqyiw&ElXu85KYb*;fY>Ueo3?sup}~O&tK6jCT1v;N#%P` zH^V2OJ0CP@UrN;1Q7K+oVrWWht?5n?_Q-xfM?k4;G1>A??O1NTry762f@gGTd!pqu z)!2rI)+ljRc5Z~wc%27Zl7`$d!82Pv z^S+5z;V?0I5uq#yv5Z?fQ)76ceyWI07w5G!q9#=@w#ESp+J%lH`2pt9(%>5l**6s{ zm)lB%gg9Fn>)Q_~kcE~5(xBx}j#BSLwN_gS$%VFOolQlU!!&+?`vu#c8C|HtuPylL zn{lk!?E~D>C_jP-9UgR58}VPEO07uevUR&-#;j7Ss0Je4Kh)*{u1=B-wbd zT5*c>6f-HkTP0yN;mEukqo>v_ol|Bn(VeC+0uvLZND|M`tGH*?vrC>88-(hg)Zk=7 z6bmd)n|$o-uD6jdwS70FSF7qN(i<1s(&H5wYJQTQqD-F~@Y*g!R)whe2U9W$4|-k{ zL~R%r-EXuuStO#7@&yto!TyGm%C<6|nVk53eh>`WAdlwnw z_cT|EVwM!HN(@eGCVm@cRxehMwUJ8K>?{mFQRbx-yngazT1)#mJGa;LqPeY02e-o9 zo9Tem$wrZzPqUB8`iYCZxjT%-er@E5?dZyD@g~+_mYDfQkT2}M` zKqyua%O@DiEJmI2)gHq9>6SC>3L)>Jpl!0`J38Xx%zHYmw|a|$`JHKd4t5dVXeD(KF*#~#VqtU5_{fFrqeR~=PcgBI{C;YP!ocO9~+ zgH*7xF0RP^ve|7yC8zfZXss+JE^qN^N%iNfGWU70YYb`r+e#K|F|vV|);FdwsIPn< zBVt<=bMtw7;E=w+RssGra;=+T{I|3z=hW<05i=k5tI6Ma`Isq1mqqZ%gszg%qKc&I zkKZIbWKF%UTj4>F@11K7Vdh)5_T~;@HBq$>qdSdbPm;2D9@qSdvPUzexJKkYZfqaN&9(knB7}w|u@A@6TAqsO_N_VE#)ZlA$4Q3%>Tja0k^M5#` za8#P17>FEG{i$>qEG(!}sADXQ`Z01AH5D#h zLS}5r%ZE=fS4^+*NjZTy39p|yvpu{RJXSILDXbcIDT!cK(`vtb#cWjn!R*tC+e%Rm zzI{%N{N^svOebq==trt+_=ra!T(Fu5c4vqAwW553<&_SqKu_CI-ibz^N?gfn=9}7M z>UvB1Z>LXmQl|BQKM#^UpI1Jo)>f*GBFr!KbcpeI2p~Ssw9=4#HA#Y-lILCeQ){p93>D+jsF_*~zrWO#h-gPboY zs*3AYK#I$=QtMZ>a6#Intd4{QW1U6$_oDz44Y%(+RF=;T?51NZYR*Yu4AnanE1#vd z5sUIUXSC{EOd0EmSPcA)uy7#UJ|8{cp7z=rx-w2+N${D5Za}hH6D-lQ8CRAVa1!ZAjK}3I2ng( zHvETmBJ=f*xM?y^wlnDp@@C_%+crWnw831^S2IkLgR&1eW^h}>ThwMW1CO>J7c7#3 zW~vrVpT1qP5Kp7t6qf$n>1wr>eg$5v?_B$~pzq7HC>FzjeM(E@Wjz!a&2dKYW1GO$ zzsr1@hDRrbPc-@ITGUhZrHBrlmXz|LY%cDaPBmR_q+dP|6iK)o@6auMv{3P`64`O^;mze&BHb(T!RxydO>)V)TSfM#QqLWjpzYq zp_i6G??6ZB}ZWoTz%wCkrhf9=ep|Tn-ni=3(EU6mIBe zbl9(e%K=ZXP&+(T8~P%8f#FknhM&^c*2%nr+tW?q_{=LXIcZzH1lKnKO6fG=o?uSu zCYe+)1>Y^T-}e~5ujX-4#xZ_SqNwAQ8~91s%H$iSzza&oz7@?0VUrUdYFj8CwjG~Y z}QM}={JQ8oGLqGTkkUZo);ZQNyytkknM%WT(dOE#-KI0zS(TUAva)3X42v0Cjd^t>DNbm>ZguvLLQEu*{_HNGq{ABkZ z<3F7LLM8ux{v!c}Nc=hf`2+CfvO{u7bNwT009?XcNQ66zOZwO4K*120I0P!p<%V*y z@I<-(3L!2kA^O|qUzP5iAW2Ci zL=p@^AwW`4*e?M^T0>FR5-=nfh_pfgkrLuoXfz6j27=KD2?Y9Iga6({_}johDHs@q zl$1b2t$_$APzs2IN}{cS5(p_tYY7Py1S$ao10gV&1QLV*O2QxrYlJuwB#xB$J>3+C zN&E`z=I-o@utEJLD%RN!YMaz<;wmw4EaggK+xo_MaHya`_Sb$)Ehm|8o8ho^akh04e|g(y%7J diff --git a/crates/api/src/temp/LOCKFILE b/crates/api/src/temp/LOCKFILE deleted file mode 100644 index e69de29..0000000 diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs index ba24979..a06b93d 100644 --- a/crates/snapshot/src/engine/mod.rs +++ b/crates/snapshot/src/engine/mod.rs @@ -92,7 +92,7 @@ impl SnapshotEngine { Ok(()) } - // TODO: ask someone about sync issues (i dont think there are any) + // TODO: fix sync issues if any (i dont think there are any) fn worker( interval: Duration, last_k: usize, diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs index 1d77940..32311d2 100644 --- a/crates/snapshot/src/lib.rs +++ b/crates/snapshot/src/lib.rs @@ -31,8 +31,6 @@ use tar::Archive; use tempfile::tempdir; use uuid::Uuid; -// TODO: implement snapshot engine that runs in its own thread and wakes up in regular intervals - type VectorDbRestore = (Arc, Arc>, usize); pub struct Snapshot { diff --git a/crates/snapshot/src/registry/mod.rs b/crates/snapshot/src/registry/mod.rs index 3590bf1..6513d97 100644 --- a/crates/snapshot/src/registry/mod.rs +++ b/crates/snapshot/src/registry/mod.rs @@ -1,18 +1,3 @@ -// what do i need this to do? -// manage a source of snapshots and load and unload metadata -// load latest snapshot, load latest snapshot before x (ok but) -// manage a worker thread that is woken up at regular intervals or at the call of a function to take snapshot -// it accepts an arc -// -// broad architecture - what i want: -// - abstract snapshot source -> can be local directory or remote(define protocol) -// source operations: -// - add snapshot (with Snapshot) -// - read snapshot metadatas with paging -// - read Snapshot of specific snapshot - internal implementation: unpack and read manifest file`(dont bother with checksums verification) -// - make a proxy wrapper that deletes the temp file on destroy - caching is internal implementation -// - use std::path::{Path, PathBuf}; use defs::DbError; From 47e9d842de0116b4f645becf9950ef6b8fc3f0b5 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 08:00:04 +0530 Subject: [PATCH 22/25] separate worker and main thread snapshot apis; more comprehensive unit test for snapshot reload --- crates/api/src/lib.rs | 46 ++++++++++++++---- crates/snapshot/src/engine/mod.rs | 79 +++++++++++++++++++------------ 2 files changed, 87 insertions(+), 38 deletions(-) diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 390fb12..f712fec 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -432,21 +432,46 @@ mod tests { fn test_create_and_load_snapshot() { let (old_db, temp_dir) = create_test_db(); - let vec1 = vec![0.0, 1.0, 2.0]; - let point_id = old_db + let v1 = vec![0.0, 1.0, 2.0]; + let v2 = vec![3.0, 4.0, 5.0]; + let v3 = vec![6.0, 7.0, 8.0]; + + let id1 = old_db .insert( - vec1.clone(), + v1.clone(), Payload { content_type: ContentType::Text, - content: format!("Test content {}", 0), + content: "test".to_string() }, ) .unwrap(); - let temp_snapshot_dir = tempdir().unwrap(); + let id2 = old_db + .insert( + v2.clone(), + Payload { + content_type: ContentType::Text, + content: "test".to_string(), + }, + ) + .unwrap(); + + + let temp_snapshot_dir = tempdir().unwrap(); let snapshot_path = old_db.create_snapshot(temp_snapshot_dir.path()).unwrap(); + // insert v3 after snapshot + let id3 = old_db + .insert( + v3.clone(), + Payload { + content_type: ContentType::Text, + content: "test".to_string(), + }, + ) + .unwrap(); + let reload_config = DbRestoreConfig { data_path: temp_dir.path().to_path_buf(), snapshot_path, @@ -455,15 +480,18 @@ mod tests { std::mem::drop(old_db); let loaded_db = restore_from_snapshot(&reload_config).unwrap(); - assert!(loaded_db.get(point_id).is_ok()); + assert!(loaded_db.get(id1).unwrap_or(None).is_some()); + assert!(loaded_db.get(id2).unwrap_or(None).is_some()); + assert!(!loaded_db.get(id3).unwrap_or(None).is_some()); // v3 was inserted after snapshot was taken + - // check if vectors was restored - assert!(loaded_db.get(point_id).unwrap().unwrap().vector.unwrap() == vec1); + // vector restore check + assert!(loaded_db.get(id1).unwrap().unwrap().vector.unwrap() == v1); + assert!(loaded_db.get(id2).unwrap().unwrap().vector.unwrap() == v2); } #[test] fn test_snapshot_engine() { - //TODO: write proper unit test let (_db, _temp_dir) = create_test_db(); let db = Arc::new(Mutex::new(_db)); diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs index a06b93d..7c98d1b 100644 --- a/crates/snapshot/src/engine/mod.rs +++ b/crates/snapshot/src/engine/mod.rs @@ -48,7 +48,8 @@ impl SnapshotEngine { Ok(()) } - pub fn snapshot(&mut self) -> Result<(), DbError> { + // notify the worker to take a snapshot now + pub fn worker_snapshot(&mut self) -> Result<(), DbError> { // acquire lock for worker_running let worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; if !*worker_running { @@ -60,6 +61,16 @@ impl SnapshotEngine { Ok(()) } + // take a snapshot on the callers thread + pub fn snapshot(&mut self) -> Result<(), DbError> { + Self::take_snapshot( + &mut self.db, + &mut self.registry, + &mut self.snapshot_queue, + self.last_k, + ) + } + pub fn start_worker(&mut self) -> Result<(), DbError> { // acquire lock for worker_running let mut worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; @@ -92,15 +103,49 @@ impl SnapshotEngine { Ok(()) } + // helper function to take snapshot + fn take_snapshot( + db: &mut Arc>, + registry: &mut Arc>, + snapshot_queue: &mut Arc>>, + last_k: usize, + ) -> Result<(), DbError> { + let snapshot_path = db + .lock() + .unwrap() + .create_snapshot(registry.lock().unwrap().dir().as_path()) + .unwrap(); + let snapshot_metadata = Metadata::parse(&snapshot_path).unwrap(); + + // add the snapshot to registry + registry + .lock() + .unwrap() + .add_snapshot(&snapshot_path) + .unwrap(); + + { + let mut queue = snapshot_queue.lock().unwrap(); + queue.push_back(snapshot_metadata); + + while queue.len() > last_k { + let old = queue.pop_front().unwrap(); + registry.lock().unwrap().mark_dead(old.small_id).unwrap(); + } + // drop queue lock + } + Ok(()) + } + // TODO: fix sync issues if any (i dont think there are any) fn worker( interval: Duration, last_k: usize, worker_running: Arc>, - db: Arc>, - registry: Arc>, + mut db: Arc>, + mut registry: Arc>, worker_cv: Arc, - snapshot_queue: Arc>>, + mut snapshot_queue: Arc>>, ) { loop { // acquire the lock and exit if its false @@ -112,31 +157,7 @@ impl SnapshotEngine { break; } - let snapshot_path = db - .lock() - .unwrap() - .create_snapshot(registry.lock().unwrap().dir().as_path()) - .unwrap(); - let snapshot_metadata = Metadata::parse(&snapshot_path).unwrap(); - - // add the snapshot to registry - registry - .lock() - .unwrap() - .add_snapshot(&snapshot_path) - .unwrap(); - - { - let mut queue = snapshot_queue.lock().unwrap(); - queue.push_back(snapshot_metadata); - - while queue.len() > last_k { - let old = queue.pop_front().unwrap(); - registry.lock().unwrap().mark_dead(old.small_id).unwrap(); - } - - // drop queue lock - } + Self::take_snapshot(&mut db, &mut registry, &mut snapshot_queue, last_k).unwrap(); let _ = worker_cv.wait_timeout(worker_running, interval).unwrap(); } From 3d6634535405437551f11be762b486f7b26a9b7e Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 09:52:47 +0530 Subject: [PATCH 23/25] add comprehensive unit test for snapshot engine; fix a gnarly bug in storage engine snapshot --- crates/api/src/lib.rs | 95 ++++++++++++++++++++----------- crates/snapshot/README.md | 0 crates/snapshot/src/engine/mod.rs | 23 +++++--- crates/snapshot/src/metadata.rs | 1 + crates/storage/src/rocks_db.rs | 4 +- 5 files changed, 82 insertions(+), 41 deletions(-) create mode 100644 crates/snapshot/README.md diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index f712fec..0f659ba 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -217,14 +217,11 @@ mod tests { // TODO: Add more exhaustive tests - use std::{sync::Mutex, thread::sleep, time::Duration}; + use std::sync::Mutex; use super::*; use defs::ContentType; - use snapshot::{ - engine::SnapshotEngine, - registry::{SnapshotRegistry, local::LocalRegistry}, - }; + use snapshot::{engine::SnapshotEngine, registry::local::LocalRegistry}; use tempfile::{TempDir, tempdir}; // Helper function to create a test database @@ -441,12 +438,11 @@ mod tests { v1.clone(), Payload { content_type: ContentType::Text, - content: "test".to_string() + content: "test".to_string(), }, ) .unwrap(); - let id2 = old_db .insert( v2.clone(), @@ -457,7 +453,6 @@ mod tests { ) .unwrap(); - let temp_snapshot_dir = tempdir().unwrap(); let snapshot_path = old_db.create_snapshot(temp_snapshot_dir.path()).unwrap(); @@ -482,8 +477,7 @@ mod tests { assert!(loaded_db.get(id1).unwrap_or(None).is_some()); assert!(loaded_db.get(id2).unwrap_or(None).is_some()); - assert!(!loaded_db.get(id3).unwrap_or(None).is_some()); // v3 was inserted after snapshot was taken - + assert!(loaded_db.get(id3).unwrap_or(None).is_none()); // v3 was inserted after snapshot was taken // vector restore check assert!(loaded_db.get(id1).unwrap().unwrap().vector.unwrap() == v1); @@ -493,43 +487,80 @@ mod tests { #[test] fn test_snapshot_engine() { let (_db, _temp_dir) = create_test_db(); - let db = Arc::new(Mutex::new(_db)); + + let registry_tempdir = tempdir().unwrap(); + let registry = Arc::new(Mutex::new( - LocalRegistry::new(Path::new( - "/home/tanmay/Documents/CodingRepos/vector-db/crates/api/src/temp", - )) - .unwrap(), + LocalRegistry::new(registry_tempdir.path()).unwrap(), )); - let interval = 5; - let last_k = 5; - let mut se = SnapshotEngine::new(interval, last_k, db.clone(), registry.clone()); - sleep(Duration::from_secs(1)); - - se.start_worker().unwrap(); - let vec1 = vec![0.0, 1.0, 2.0]; + let last_k = 4; + let mut se = SnapshotEngine::new(last_k, db.clone(), registry.clone()); - for _ in 0..30 { - sleep(Duration::from_secs(2)); + let v1 = vec![0.0, 1.0, 2.0]; + let v2 = vec![3.0, 4.0, 5.0]; + let v3 = vec![6.0, 7.0, 8.0]; - println!( - "{}", - registry.lock().unwrap().get_latest_snapshot().unwrap() - ); + let test_vectors = vec![v1.clone(), v2.clone(), v3.clone()]; + let mut inserted_ids = Vec::new(); - let _ = db + for (i, vector) in test_vectors.clone().into_iter().enumerate() { + se.snapshot().unwrap(); + let id = db .lock() .unwrap() .insert( - vec1.clone(), + vector.clone(), Payload { content_type: ContentType::Text, - content: format!("Test content {}", 0), + content: format!("{}", i), }, ) .unwrap(); + inserted_ids.push(id); + } + se.snapshot().unwrap(); + let snapshots = se.list_alive_snapshots().unwrap(); + + // asserting these cases: + // snapshot 0 : no vectors + // snapshot 1 : v1 + // snapshot 2 : v1, v2 + // snapshot 3 : v1, v2, v3 + + std::mem::drop(db); + std::mem::drop(se); + + for (i, snapshot) in snapshots.iter().enumerate() { + let temp_dir = tempdir().unwrap(); + let db = restore_from_snapshot(&DbRestoreConfig { + data_path: temp_dir.path().to_path_buf(), + snapshot_path: snapshot.path.clone(), + }) + .unwrap(); + for j in 0..i { + // test if point is present + assert!(db.get(inserted_ids[j]).unwrap_or(None).is_some()); + // test vector restore + assert!( + db.get(inserted_ids[j]).unwrap().unwrap().vector.unwrap() == test_vectors[j] + ); + // test payload restore + assert!( + db.get(inserted_ids[j]) + .unwrap() + .unwrap() + .payload + .unwrap() + .content + == format!("{}", j) + ); + } + for absent_id in inserted_ids.iter().skip(i) { + assert!(db.get(*absent_id).unwrap_or(None).is_none()); + } + std::mem::drop(db); } - se.stop_worker().unwrap(); } } diff --git a/crates/snapshot/README.md b/crates/snapshot/README.md new file mode 100644 index 0000000..e69de29 diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs index 7c98d1b..fe138f8 100644 --- a/crates/snapshot/src/engine/mod.rs +++ b/crates/snapshot/src/engine/mod.rs @@ -9,8 +9,7 @@ use defs::{DbError, SnapshottableDb}; use crate::{metadata::Metadata, registry::SnapshotRegistry}; pub struct SnapshotEngine { - interval: Duration, - last_k: usize, + last_k: usize, // only retain the last k snapshots on disk. old/stale snapshots are marked as dead on the registry snapshot_queue: Arc>>, db: Arc>, registry: Arc>, @@ -19,13 +18,11 @@ pub struct SnapshotEngine { } impl SnapshotEngine { pub fn new( - interval: usize, last_k: usize, db: Arc>, registry: Arc>, ) -> Self { Self { - interval: Duration::from_secs(interval as u64), last_k, snapshot_queue: Arc::new(Mutex::new(VecDeque::new())), db, @@ -48,7 +45,7 @@ impl SnapshotEngine { Ok(()) } - // notify the worker to take a snapshot now + // notify the worker thread to take a snapshot now pub fn worker_snapshot(&mut self) -> Result<(), DbError> { // acquire lock for worker_running let worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; @@ -71,7 +68,17 @@ impl SnapshotEngine { ) } - pub fn start_worker(&mut self) -> Result<(), DbError> { + pub fn list_alive_snapshots(&mut self) -> Result, DbError> { + Ok(self + .snapshot_queue + .lock() + .map_err(|_| DbError::LockError)? + .iter() + .cloned() + .collect()) + } + + pub fn start_worker(&mut self, interval: i64) -> Result<(), DbError> { // acquire lock for worker_running let mut worker_running = self.worker_running.lock().map_err(|_| DbError::LockError)?; if *worker_running { @@ -86,12 +93,12 @@ impl SnapshotEngine { let registry_clone = Arc::clone(&self.registry); let worker_cv_clone = Arc::clone(&self.worker_cv); let snapshot_queue_clone = Arc::clone(&self.snapshot_queue); - let interval_clone = self.interval; let last_k_clone = self.last_k; + let dur_interval = Duration::from_secs(interval as u64); let _ = std::thread::spawn(move || { Self::worker( - interval_clone, + dur_interval, last_k_clone, worker_running_clone, db_clone, diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs index 8884b6f..cd73185 100644 --- a/crates/snapshot/src/metadata.rs +++ b/crates/snapshot/src/metadata.rs @@ -10,6 +10,7 @@ use uuid::Uuid; pub type SmallID = String; // Metadata is the data that can be parsed from the snapshot filename +#[derive(Debug, Clone)] pub struct Metadata { pub small_id: SmallID, pub date: SystemTime, diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index 571ffaf..b09c955 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -282,6 +282,9 @@ impl StorageEngine for RocksDbStorage { .as_ref() .ok_or(DbError::StorageInitializationError)? .cancel_all_background_work(true); + // drop db early + self.db = None; + std::fs::remove_dir_all(&self.path).map_err(|e| { DbError::StorageCheckpointError(format!("Couldn't remove existing data: {}", e)) })?; @@ -296,7 +299,6 @@ impl StorageEngine for RocksDbStorage { })?; // reinitialize db - self.db = None; self.db = Some(Self::initialize_db(&self.path)?); Ok(()) From 6789494f29a23f76752a1ebd372421944379c0ca Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 10:04:29 +0530 Subject: [PATCH 24/25] remove todo --- crates/index/src/flat/tests.rs | 1 - crates/index/src/kd_tree/tests.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/crates/index/src/flat/tests.rs b/crates/index/src/flat/tests.rs index 2f08daa..6d43c3d 100644 --- a/crates/index/src/flat/tests.rs +++ b/crates/index/src/flat/tests.rs @@ -197,7 +197,6 @@ fn test_default() { #[test] fn test_serialize_and_deserialize_topo() { - // TODO: currently only tests topology and not vector restore; requires InMemory storage for vector restore testing (RocksDB seems to heavy to be used here for testing) let id1 = Uuid::new_v4(); let id2 = Uuid::new_v4(); let id3 = Uuid::new_v4(); diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs index 0d44cc1..5b30952 100644 --- a/crates/index/src/kd_tree/tests.rs +++ b/crates/index/src/kd_tree/tests.rs @@ -705,7 +705,6 @@ fn test_kdtree_vs_flat_euclidean_5d() { #[test] fn test_serialize_and_deserialize_topo() { - // TODO: currently only tests topology and not vector restore; requires InMemory storage for vector restore testing (RocksDB seems to heavy to be used here for testing) let id1 = Uuid::new_v4(); let id2 = Uuid::new_v4(); let id3 = Uuid::new_v4(); From f3167f30b0b7fb7191cd11f132f0a483a4814759 Mon Sep 17 00:00:00 2001 From: Tanmay Arya Date: Mon, 5 Jan 2026 12:11:53 +0530 Subject: [PATCH 25/25] fix unit test for rocksdb --- crates/api/src/lib.rs | 2 +- crates/storage/src/rocks_db.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 0f659ba..661a8d2 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -229,7 +229,7 @@ mod tests { let temp_dir = tempdir().unwrap(); let config = DbConfig { storage_type: StorageType::RocksDb, - index_type: IndexType::KDTree, + index_type: IndexType::Flat, data_path: temp_dir.path().to_path_buf(), dimension: 3, }; diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index b09c955..7d184d4 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -420,7 +420,6 @@ mod tests { fn test_create_and_load_checkpoint() { let (mut db, temp_dir) = create_test_db(); - let checkpoint_path = temp_dir.path().join("temp-checkpoint.tar.gz"); let id1 = Uuid::new_v4(); let id2 = Uuid::new_v4(); @@ -436,7 +435,7 @@ mod tests { ); let checkpoint = db - .checkpoint_at(&checkpoint_path) + .checkpoint_at(temp_dir.path()) .expect("Failed to create checkpoint"); assert!(