Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions monitoring/grafana/provisioning/dashboards/dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"editorMode": "code",
Expand Down Expand Up @@ -149,7 +149,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -219,7 +219,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"editorMode": "code",
Expand Down Expand Up @@ -324,7 +324,7 @@
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"editorMode": "code",
Expand Down Expand Up @@ -373,7 +373,7 @@
}
]
},
"unit": "ms"
"unit": "s"
},
"overrides": []
},
Expand Down Expand Up @@ -401,7 +401,7 @@
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"editorMode": "code",
Expand Down Expand Up @@ -508,7 +508,7 @@
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -634,7 +634,7 @@
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -733,7 +733,7 @@
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"pluginVersion": "12.0.1+security-01",
"targets": [
{
"datasource": {
Expand Down Expand Up @@ -772,5 +772,5 @@
"timezone": "",
"title": "Semcache",
"uid": "chat-api-dashboard",
"version": 2
"version": 1
}
46 changes: 34 additions & 12 deletions src/cache/cache_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,23 @@ where

fn is_full(&self) -> bool {
match &self.eviction_policy {
EvictionPolicy::EntryLimit(limit) => self.response_store.len() >= *limit,
EvictionPolicy::EntryLimit(limit) => {
debug!(
"Cache size: {}, limit: {}",
self.response_store.len(),
limit
);
self.response_store.len() >= *limit
}
EvictionPolicy::MemoryLimitMb(limit) => {
let response_store_memory_used_mb =
self.response_store.memory_usage_bytes() as f64 / 1024.0;
self.response_store.memory_usage_bytes() as f64 / (1024.0 * 1024.0);
let semantic_store_memory_used_mb =
self.semantic_store.memory_usage_bytes() as f64 / 1024.0;
self.semantic_store.memory_usage_bytes() as f64 / (1024.0 * 1024.0);
let total_memory_used_mb =
response_store_memory_used_mb + semantic_store_memory_used_mb;
let limit_mb = *limit as f64;
debug!("Cache size: {}, limit: {}", total_memory_used_mb, limit_mb);
total_memory_used_mb >= limit_mb
}
}
Expand Down Expand Up @@ -304,36 +312,50 @@ mod tests {

#[test]
fn insert_should_evict_when_memory_limit_reached() {
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};

let embedding = vec![0.1, 0.2, 0.3];
let response = "A".repeat(100 * 1024).into_bytes();
let response = "A".repeat(400 * 1024).into_bytes(); // 400KB

// Track number of entries in semantic store for realistic memory reporting
let entry_count = Arc::new(AtomicUsize::new(0));
let entry_count_clone = Arc::clone(&entry_count);
let entry_count_clone2 = Arc::clone(&entry_count);

// given
let mut mock_store = MockSemanticStore::new();

mock_store.expect_put().times(3).returning(|_, _| Ok(()));
mock_store.expect_delete().times(2).returning(|_| Ok(()));
mock_store.expect_put().times(3).returning(move |_, _| {
entry_count_clone.fetch_add(1, Ordering::Relaxed);
Ok(())
});
mock_store.expect_delete().times(2).returning(move |_| {
entry_count_clone2.fetch_sub(1, Ordering::Relaxed);
Ok(())
});
mock_store
.expect_memory_usage_bytes()
.returning(|| 100 * 1024);
.returning(move || entry_count.load(Ordering::Relaxed) * 400 * 1024); // 400KB per entry

let response_store = ResponseStore::new();

// Set limit to 1MB - each entry uses ~0.8MB (400KB response + 400KB semantic)
let cache = CacheImpl::new(
Box::new(mock_store),
response_store,
0.9,
EvictionPolicy::MemoryLimitMb(300),
EvictionPolicy::MemoryLimitMb(1),
);

// when - add first entry
cache.insert(embedding.clone(), response.clone()).unwrap();
assert!(!cache.is_full()); // should have ~200 megabytes (100 string + overhead (32 bytes) + 100 semantic)
assert!(!cache.is_full()); // should have ~0.8MB which is under 1MB limit

// when - add second entry, this triggers eviction because memory exceeds limit of 300 (200
// string + overhead (2 * 32 bytes) + 100 semantic)
// when - add second entry, this should trigger eviction because 2 entries would be ~1.6MB
cache.insert(embedding.clone(), response.clone()).unwrap();
assert_eq!(cache.response_store.len(), 1); // evicted back to 1
assert!(!cache.is_full());
assert!(!cache.is_full()); // single entry is under limit

// when - add third entry, again triggers eviction
cache.insert(embedding.clone(), response.clone()).unwrap();
Expand Down
2 changes: 2 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ async fn main() {
panic!("Missing or malformed eviction policy in config")
});

info!("Eviction policy {:?}", eviction_policy);

let shared_state = Arc::new(AppState::new(similarity_threshold, eviction_policy));

// read through cache (proxy) routes
Expand Down